From dbf11f57a7c90596af2c2560f6d10e985206adf9 Mon Sep 17 00:00:00 2001 From: sinofis Date: Sat, 13 Jul 2019 02:04:04 +0200 Subject: [PATCH 1/9] extend mask to forehead --- facelib/LandmarksProcessor.py | 92 +++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py index a49076b..1a83b10 100644 --- a/facelib/LandmarksProcessor.py +++ b/facelib/LandmarksProcessor.py @@ -161,43 +161,75 @@ def get_image_hull_mask (image_shape, image_landmarks, ie_polys=None): hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[0:9], - int_lmrks[17:18]))) , (1,) ) + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[0:9], + # int_lmrks[17:18]))) , (1,) ) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[8:17], - int_lmrks[26:27]))) , (1,) ) + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[8:17], + # int_lmrks[26:27]))) , (1,) ) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[17:20], - int_lmrks[8:9]))) , (1,) ) + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[17:20], + # int_lmrks[8:9]))) , (1,) ) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[24:27], - int_lmrks[8:9]))) , (1,) ) + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[24:27], + # int_lmrks[8:9]))) , (1,) ) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[19:25], - int_lmrks[8:9], - ))) , (1,) ) + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[19:25], + # int_lmrks[8:9], + # ))) , (1,) ) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[17:22], - int_lmrks[27:28], - int_lmrks[31:36], - int_lmrks[8:9] - ))) , (1,) ) + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[17:22], + # int_lmrks[27:28], + # int_lmrks[31:36], + # int_lmrks[8:9] + # ))) , (1,) ) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[22:27], - int_lmrks[27:28], - int_lmrks[31:36], - int_lmrks[8:9] - ))) , (1,) ) + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[22:27], + # int_lmrks[27:28], + # int_lmrks[31:36], + # int_lmrks[8:9] + # ))) , (1,) ) - #nose - cv2.fillConvexPoly( hull_mask, cv2.convexHull(int_lmrks[27:36]), (1,) ) + # #nose + # cv2.fillConvexPoly( hull_mask, cv2.convexHull(int_lmrks[27:36]), (1,) ) + ml_pnt = (landmarks[36] + landmarks[0]) // 2 + mr_pnt = (landmarks[16] + landmarks[45]) // 2 + + # mid points between the mid points and eye + ql_pnt = (landmarks[36] + ml_pnt) // 2 + qr_pnt = (landmarks[45] + mr_pnt) // 2 + + # Top of the eye arrays + bot_l = np.array((ql_pnt, landmarks[36], landmarks[37], landmarks[38], landmarks[39])) + bot_r = np.array((landmarks[42], landmarks[43], landmarks[44], landmarks[45], qr_pnt)) + + # Eyebrow arrays + top_l = landmarks[17:22] + top_r = landmarks[22:27] + + # Adjust eyebrow arrays + landmarks[17:22] = top_l + ((top_l - bot_l) // 2) + landmarks[22:27] = top_r + ((top_r - bot_r) // 2) + + r_jaw = (landmarks[0:9], landmarks[17:18]) + l_jaw = (landmarks[8:17], landmarks[26:27]) + r_cheek = (landmarks[17:20], landmarks[8:9]) + l_cheek = (landmarks[24:27], landmarks[8:9]) + nose_ridge = (landmarks[19:25], landmarks[8:9],) + r_eye = (landmarks[17:22], landmarks[27:28], landmarks[31:36], landmarks[8:9]) + l_eye = (landmarks[22:27], landmarks[27:28], landmarks[31:36], landmarks[8:9]) + nose = (landmarks[27:31], landmarks[31:36]) + parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] + + for item in parts: + merged = np.concatenate(item) + cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member if ie_polys is not None: ie_polys.overlay_mask(hull_mask) From 9a695340b59def592ba7eacd023d56b4226a8e33 Mon Sep 17 00:00:00 2001 From: sinofis Date: Sat, 13 Jul 2019 04:40:48 +0200 Subject: [PATCH 2/9] fix mask --- facelib/LandmarksProcessor.py | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py index 1a83b10..66c29dd 100644 --- a/facelib/LandmarksProcessor.py +++ b/facelib/LandmarksProcessor.py @@ -157,7 +157,7 @@ def transform_points(points, mat, invert=False): def get_image_hull_mask (image_shape, image_landmarks, ie_polys=None): if len(image_landmarks) != 68: raise Exception('get_image_hull_mask works only with 68 landmarks') - int_lmrks = np.array(image_landmarks, dtype=np.int) + int_lmrks = np.array(image_landmarks.copy(), dtype=np.int) hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) @@ -198,33 +198,33 @@ def get_image_hull_mask (image_shape, image_landmarks, ie_polys=None): # #nose # cv2.fillConvexPoly( hull_mask, cv2.convexHull(int_lmrks[27:36]), (1,) ) - ml_pnt = (landmarks[36] + landmarks[0]) // 2 - mr_pnt = (landmarks[16] + landmarks[45]) // 2 + ml_pnt = (int_lmrks[36] + int_lmrks[0]) // 2 + mr_pnt = (int_lmrks[16] + int_lmrks[45]) // 2 # mid points between the mid points and eye - ql_pnt = (landmarks[36] + ml_pnt) // 2 - qr_pnt = (landmarks[45] + mr_pnt) // 2 + ql_pnt = (int_lmrks[36] + ml_pnt) // 2 + qr_pnt = (int_lmrks[45] + mr_pnt) // 2 # Top of the eye arrays - bot_l = np.array((ql_pnt, landmarks[36], landmarks[37], landmarks[38], landmarks[39])) - bot_r = np.array((landmarks[42], landmarks[43], landmarks[44], landmarks[45], qr_pnt)) + bot_l = np.array((ql_pnt, int_lmrks[36], int_lmrks[37], int_lmrks[38], int_lmrks[39])) + bot_r = np.array((int_lmrks[42], int_lmrks[43], int_lmrks[44], int_lmrks[45], qr_pnt)) # Eyebrow arrays - top_l = landmarks[17:22] - top_r = landmarks[22:27] + top_l = int_lmrks[17:22] + top_r = int_lmrks[22:27] # Adjust eyebrow arrays - landmarks[17:22] = top_l + ((top_l - bot_l) // 2) - landmarks[22:27] = top_r + ((top_r - bot_r) // 2) + int_lmrks[17:22] = top_l + ((top_l - bot_l) // 2) + int_lmrks[22:27] = top_r + ((top_r - bot_r) // 2) - r_jaw = (landmarks[0:9], landmarks[17:18]) - l_jaw = (landmarks[8:17], landmarks[26:27]) - r_cheek = (landmarks[17:20], landmarks[8:9]) - l_cheek = (landmarks[24:27], landmarks[8:9]) - nose_ridge = (landmarks[19:25], landmarks[8:9],) - r_eye = (landmarks[17:22], landmarks[27:28], landmarks[31:36], landmarks[8:9]) - l_eye = (landmarks[22:27], landmarks[27:28], landmarks[31:36], landmarks[8:9]) - nose = (landmarks[27:31], landmarks[31:36]) + r_jaw = (int_lmrks[0:9], int_lmrks[17:18]) + l_jaw = (int_lmrks[8:17], int_lmrks[26:27]) + r_cheek = (int_lmrks[17:20], int_lmrks[8:9]) + l_cheek = (int_lmrks[24:27], int_lmrks[8:9]) + nose_ridge = (int_lmrks[19:25], int_lmrks[8:9],) + r_eye = (int_lmrks[17:22], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) + l_eye = (int_lmrks[22:27], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) + nose = (int_lmrks[27:31], int_lmrks[31:36]) parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] for item in parts: From 84649f1b66ecc773a0013723699e3f297000fcb4 Mon Sep 17 00:00:00 2001 From: sinofis Date: Mon, 22 Jul 2019 18:44:35 +0200 Subject: [PATCH 3/9] added random flip to override --- models/ModelBase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/ModelBase.py b/models/ModelBase.py index 0ff3154..c252d0d 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -127,7 +127,7 @@ class ModelBase(object): self.options['sort_by_yaw'] = self.options.get('sort_by_yaw', False) if ask_random_flip: - if (self.iter == 0): + if (self.iter == 0 or ask_override): self.options['random_flip'] = io.input_bool("Flip faces randomly? (y/n ?:help skip:y) : ", True, help_message="Predicted face will look more naturally without this option, but src faceset should cover all face directions as dst faceset.") else: self.options['random_flip'] = self.options.get('random_flip', True) From a2d5369051a4cb32535934483d141e3cc0e66e0b Mon Sep 17 00:00:00 2001 From: seranus <=> Date: Sun, 28 Jul 2019 11:40:43 +0200 Subject: [PATCH 4/9] update linux --- .github/ISSUE_TEMPLATE.md | 36 +- .gitignore | 14 +- CODEGUIDELINES | 8 +- LICENSE | 1348 ++++---- README.md | 98 +- converters/Converter.py | 100 +- converters/ConverterAvatar.py | 140 +- converters/ConverterImage.py | 100 +- converters/ConverterMasked.py | 870 +++--- converters/__init__.py | 8 +- doc/doc_build_and_repository_info.md | 8 +- doc/doc_prebuilt_windows_app.md | 48 +- doc/doc_ready_to_work_facesets.md | 22 +- doc/gallery/doc_gallery.md | 4 +- facelib/DLIBExtractor.py | 80 +- facelib/FANSegmentator.py | 278 +- facelib/FaceType.py | 66 +- facelib/LandmarksExtractor.py | 240 +- facelib/LandmarksProcessor.py | 772 ++--- facelib/MTCExtractor.py | 700 ++--- facelib/PoseEstimator.py | 626 ++-- facelib/S3FDExtractor.py | 196 +- facelib/__init__.py | 12 +- imagelib/DCSCN.py | 328 +- imagelib/IEPolys.py | 206 +- imagelib/__init__.py | 52 +- imagelib/blur.py | 284 +- imagelib/color_transfer.py | 382 +-- imagelib/common.py | 40 +- imagelib/draw.py | 26 +- imagelib/equalize_and_stack_square.py | 88 +- imagelib/estimate_sharpness.py | 554 ++-- imagelib/morph.py | 72 +- imagelib/reduce_colors.py | 28 +- imagelib/text.py | 128 +- imagelib/warp.py | 100 +- interact/__init__.py | 2 +- interact/interact.py | 808 ++--- joblib/SubprocessFunctionCaller.py | 84 +- joblib/SubprocessorBase.py | 576 ++-- joblib/__init__.py | 4 +- localization/__init__.py | 4 +- localization/localization.py | 60 +- main.py | 552 ++-- mainscripts/Converter.py | 782 ++--- mainscripts/Extractor.py | 1746 +++++------ mainscripts/MaskEditorTool.py | 1112 +++---- mainscripts/Sorter.py | 1606 +++++----- mainscripts/Trainer.py | 648 ++-- mainscripts/Util.py | 312 +- mainscripts/VideoEd.py | 398 +-- mathlib/__init__.py | 50 +- mathlib/umeyama.py | 142 +- models/ModelBase.py | 1230 ++++---- models/Model_DEV_FANSEG/Model.py | 204 +- models/Model_DEV_FANSEG/__init__.py | 2 +- models/Model_DEV_POSEEST/Model.py | 242 +- models/Model_DEV_POSEEST/__init__.py | 2 +- models/Model_DF/Model.py | 342 +- models/Model_DF/__init__.py | 2 +- models/Model_H128/Model.py | 410 +-- models/Model_H128/__init__.py | 2 +- models/Model_H64/Model.py | 404 +-- models/Model_H64/__init__.py | 2 +- models/Model_LIAEF128/Model.py | 360 +-- models/Model_LIAEF128/__init__.py | 2 +- models/Model_RecycleGAN/Model.py | 966 +++--- models/Model_SAE/Model.py | 1400 ++++----- models/Model_SAE/__init__.py | 2 +- models/__init__.py | 10 +- nnlib/CAInitializer.py | 224 +- nnlib/__init__.py | 2 +- nnlib/device.py | 714 ++--- nnlib/nnlib.py | 2096 ++++++------- nnlib/pynvml.py | 3452 ++++++++++----------- requirements-colab.txt | 18 +- requirements-cpu.txt | 18 +- requirements-cuda.txt | 22 +- requirements-opencl.txt | 22 +- samplelib/Sample.py | 148 +- samplelib/SampleGeneratorBase.py | 48 +- samplelib/SampleGeneratorFace.py | 284 +- samplelib/SampleGeneratorFaceTemporal.py | 168 +- samplelib/SampleGeneratorImageTemporal.py | 156 +- samplelib/SampleLoader.py | 304 +- samplelib/SampleProcessor.py | 632 ++-- samplelib/__init__.py | 16 +- utils/DFLJPG.py | 606 ++-- utils/DFLPNG.py | 820 ++--- utils/Path_utils.py | 164 +- utils/cv2_utils.py | 44 +- utils/iter_utils.py | 140 +- utils/os_utils.py | 50 +- utils/pickle_utils.py | 16 +- utils/random_utils.py | 28 +- utils/std_utils.py | 72 +- utils/struct_utils.py | 10 +- 97 files changed, 16402 insertions(+), 16402 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e8ce932..f3d856b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,19 +1,19 @@ -THIS IS NOT TECH SUPPORT FOR NEWBIE FAKERS -POST ONLY ISSUES RELATED TO BUGS OR CODE - -## Expected behavior - -*Describe, in some detail, what you are trying to do and what the output is that you expect from the program.* - -## Actual behavior - -*Describe, in some detail, what the program does instead. Be sure to include any error message or screenshots.* - -## Steps to reproduce - -*Describe, in some detail, the steps you tried that resulted in the behavior described above.* - -## Other relevant information -- **Command lined used (if not specified in steps to reproduce)**: main.py ... -- **Operating system and version:** Windows, macOS, Linux +THIS IS NOT TECH SUPPORT FOR NEWBIE FAKERS +POST ONLY ISSUES RELATED TO BUGS OR CODE + +## Expected behavior + +*Describe, in some detail, what you are trying to do and what the output is that you expect from the program.* + +## Actual behavior + +*Describe, in some detail, what the program does instead. Be sure to include any error message or screenshots.* + +## Steps to reproduce + +*Describe, in some detail, the steps you tried that resulted in the behavior described above.* + +## Other relevant information +- **Command lined used (if not specified in steps to reproduce)**: main.py ... +- **Operating system and version:** Windows, macOS, Linux - **Python version:** 3.5, 3.6.4, ... (if you are not using prebuilt windows binary) \ No newline at end of file diff --git a/.gitignore b/.gitignore index 16a6020..55123e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ -* -!*.py -!*.md -!*.txt -!*.jpg -!requirements* -!Dockerfile* +* +!*.py +!*.md +!*.txt +!*.jpg +!requirements* +!Dockerfile* !*.sh \ No newline at end of file diff --git a/CODEGUIDELINES b/CODEGUIDELINES index 0d40a02..7e6541e 100644 --- a/CODEGUIDELINES +++ b/CODEGUIDELINES @@ -1,5 +1,5 @@ -Please don't ruin the code and this good (as I think) architecture. - -Please follow the same logic and brevity/pithiness. - +Please don't ruin the code and this good (as I think) architecture. + +Please follow the same logic and brevity/pithiness. + Don't abstract the code into huge classes if you only win some lines of code in one place, because this can prevent programmers from understanding it quickly. \ No newline at end of file diff --git a/LICENSE b/LICENSE index 94a9ed0..818433e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,674 +1,674 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md index a5971b7..7ffd8b9 100644 --- a/README.md +++ b/README.md @@ -1,49 +1,49 @@ -![](doc/DFL_welcome.jpg) - -![](doc/logo_cuda.jpg)![](doc/logo_opencl.jpg)![](doc/logo_keras.jpg)![](doc/logo_tensorflow.jpg)![](doc/logo_plaidml.jpg) - -#deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets - -## **DeepFaceLab** is a tool that utilizes machine learning to replace faces in videos. - -If you like this software, please consider a donation. - -GOAL: next DeepFacelab update. - -[Donate via Yandex.Money](https://money.yandex.ru/to/41001142318065) - -[Donate via Paypal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=lepersorium@gmail.com&lc=US&no_note=0&item_name=Support+DeepFaceLab&cn=&curency_code=USD&bn=PP-DonationsBF:btn_donateCC_LG.gif:NonHosted) - -[Donate via Alipay](https://i.loli.net/2019/01/13/5c3ae3829809f.jpg) - -bitcoin:31mPd6DxPCzbpCMZk4k1koWAbErSyqkAXr - -- ### [Gallery](doc/gallery/doc_gallery.md) - -- ### Manuals: - -[English (google translated)](doc/manual_en_google_translated.pdf) - -[На русском](doc/manual_ru.pdf) - -- ### [Prebuilt windows app](doc/doc_prebuilt_windows_app.md) - -- ### Forks - -[Google Colab fork](https://github.com/chervonij/DFL-Colab) by @chervonij - -[Linux fork](https://github.com/lbfs/DeepFaceLab_Linux) by @lbfs - may be outdated - -- ### [Ready to work facesets](doc/doc_ready_to_work_facesets.md) - -- ### [Build and repository info](doc/doc_build_and_repository_info.md) - -- ### Communication groups: - -(Chinese) QQ group 951138799 for ML/AI experts - -[deepfakes (Chinese)](https://deepfakescn.com) - -[deepfakes (Chinese) (outdated) ](https://deepfakes.com.cn/) - -[reddit (English)](https://www.reddit.com/r/GifFakes/new/) +![](doc/DFL_welcome.jpg) + +![](doc/logo_cuda.jpg)![](doc/logo_opencl.jpg)![](doc/logo_keras.jpg)![](doc/logo_tensorflow.jpg)![](doc/logo_plaidml.jpg) + +#deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets + +## **DeepFaceLab** is a tool that utilizes machine learning to replace faces in videos. + +If you like this software, please consider a donation. + +GOAL: next DeepFacelab update. + +[Donate via Yandex.Money](https://money.yandex.ru/to/41001142318065) + +[Donate via Paypal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=lepersorium@gmail.com&lc=US&no_note=0&item_name=Support+DeepFaceLab&cn=&curency_code=USD&bn=PP-DonationsBF:btn_donateCC_LG.gif:NonHosted) + +[Donate via Alipay](https://i.loli.net/2019/01/13/5c3ae3829809f.jpg) + +bitcoin:31mPd6DxPCzbpCMZk4k1koWAbErSyqkAXr + +- ### [Gallery](doc/gallery/doc_gallery.md) + +- ### Manuals: + +[English (google translated)](doc/manual_en_google_translated.pdf) + +[На русском](doc/manual_ru.pdf) + +- ### [Prebuilt windows app](doc/doc_prebuilt_windows_app.md) + +- ### Forks + +[Google Colab fork](https://github.com/chervonij/DFL-Colab) by @chervonij + +[Linux fork](https://github.com/lbfs/DeepFaceLab_Linux) by @lbfs - may be outdated + +- ### [Ready to work facesets](doc/doc_ready_to_work_facesets.md) + +- ### [Build and repository info](doc/doc_build_and_repository_info.md) + +- ### Communication groups: + +(Chinese) QQ group 951138799 for ML/AI experts + +[deepfakes (Chinese)](https://deepfakescn.com) + +[deepfakes (Chinese) (outdated) ](https://deepfakes.com.cn/) + +[reddit (English)](https://www.reddit.com/r/GifFakes/new/) diff --git a/converters/Converter.py b/converters/Converter.py index b4e4213..f6e90d1 100644 --- a/converters/Converter.py +++ b/converters/Converter.py @@ -1,50 +1,50 @@ -import copy -''' -You can implement your own Converter, check example ConverterMasked.py -''' - -class Converter(object): - TYPE_FACE = 0 #calls convert_face - TYPE_FACE_AVATAR = 1 #calls convert_face with avatar_operator_face - TYPE_IMAGE = 2 #calls convert_image without landmarks - TYPE_IMAGE_WITH_LANDMARKS = 3 #calls convert_image with landmarks - - #overridable - def __init__(self, predictor_func, type): - self.predictor_func = predictor_func - self.type = type - - #overridable - def on_cli_initialize(self): - #cli initialization - pass - - #overridable - def on_host_tick(self): - pass - - #overridable - def cli_convert_face (self, img_bgr, img_face_landmarks, debug, avaperator_face_bgr=None, **kwargs): - #return float32 image - #if debug , return tuple ( images of any size and channels, ...) - return image - - #overridable - def cli_convert_image (self, img_bgr, img_landmarks, debug): - #img_landmarks not None, if input image is png with embedded data - #return float32 image - #if debug , return tuple ( images of any size and channels, ...) - return image - - #overridable - def dummy_predict(self): - #do dummy predict here - pass - - def copy(self): - return copy.copy(self) - - def copy_and_set_predictor(self, predictor_func): - result = self.copy() - result.predictor_func = predictor_func - return result +import copy +''' +You can implement your own Converter, check example ConverterMasked.py +''' + +class Converter(object): + TYPE_FACE = 0 #calls convert_face + TYPE_FACE_AVATAR = 1 #calls convert_face with avatar_operator_face + TYPE_IMAGE = 2 #calls convert_image without landmarks + TYPE_IMAGE_WITH_LANDMARKS = 3 #calls convert_image with landmarks + + #overridable + def __init__(self, predictor_func, type): + self.predictor_func = predictor_func + self.type = type + + #overridable + def on_cli_initialize(self): + #cli initialization + pass + + #overridable + def on_host_tick(self): + pass + + #overridable + def cli_convert_face (self, img_bgr, img_face_landmarks, debug, avaperator_face_bgr=None, **kwargs): + #return float32 image + #if debug , return tuple ( images of any size and channels, ...) + return image + + #overridable + def cli_convert_image (self, img_bgr, img_landmarks, debug): + #img_landmarks not None, if input image is png with embedded data + #return float32 image + #if debug , return tuple ( images of any size and channels, ...) + return image + + #overridable + def dummy_predict(self): + #do dummy predict here + pass + + def copy(self): + return copy.copy(self) + + def copy_and_set_predictor(self, predictor_func): + result = self.copy() + result.predictor_func = predictor_func + return result diff --git a/converters/ConverterAvatar.py b/converters/ConverterAvatar.py index 4a9e9b5..35da3bc 100644 --- a/converters/ConverterAvatar.py +++ b/converters/ConverterAvatar.py @@ -1,70 +1,70 @@ -import time - -import cv2 -import numpy as np - -from facelib import FaceType, LandmarksProcessor -from joblib import SubprocessFunctionCaller -from utils.pickle_utils import AntiPickler - -from .Converter import Converter - -class ConverterAvatar(Converter): - - #override - def __init__(self, predictor_func, - predictor_input_size=0): - - super().__init__(predictor_func, Converter.TYPE_FACE_AVATAR) - - self.predictor_input_size = predictor_input_size - - #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower - predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ), - np.zeros ( (predictor_input_size,predictor_input_size,1), dtype=np.float32 ) ) - time.sleep(2) - - predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) - self.predictor_func_host = AntiPickler(predictor_func_host) - self.predictor_func = predictor_func - - #overridable - def on_host_tick(self): - self.predictor_func_host.obj.process_messages() - - #override - def cli_convert_face (self, img_bgr, img_face_landmarks, debug, avaperator_face_bgr=None, **kwargs): - if debug: - debugs = [img_bgr.copy()] - - img_size = img_bgr.shape[1], img_bgr.shape[0] - - img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) - img_face_mask_aaa = np.repeat(img_face_mask_a, 3, -1) - - output_size = self.predictor_input_size - face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=FaceType.FULL) - - dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size), flags=cv2.INTER_CUBIC ) - - predictor_input_dst_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) - prd_inp_dst_face_mask_a = predictor_input_dst_face_mask_a_0[...,np.newaxis] - - prd_inp_avaperator_face_bgr = cv2.resize (avaperator_face_bgr, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) - - prd_face_bgr = self.predictor_func ( prd_inp_avaperator_face_bgr, prd_inp_dst_face_mask_a ) - - out_img = img_bgr.copy() - out_img = cv2.warpAffine( prd_face_bgr, face_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - out_img = np.clip(out_img, 0.0, 1.0) - - if debug: - debugs += [out_img.copy()] - - out_img = np.clip( img_bgr*(1-img_face_mask_aaa) + (out_img*img_face_mask_aaa) , 0, 1.0 ) - - if debug: - debugs += [out_img.copy()] - - - return debugs if debug else out_img +import time + +import cv2 +import numpy as np + +from facelib import FaceType, LandmarksProcessor +from joblib import SubprocessFunctionCaller +from utils.pickle_utils import AntiPickler + +from .Converter import Converter + +class ConverterAvatar(Converter): + + #override + def __init__(self, predictor_func, + predictor_input_size=0): + + super().__init__(predictor_func, Converter.TYPE_FACE_AVATAR) + + self.predictor_input_size = predictor_input_size + + #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower + predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ), + np.zeros ( (predictor_input_size,predictor_input_size,1), dtype=np.float32 ) ) + time.sleep(2) + + predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) + self.predictor_func_host = AntiPickler(predictor_func_host) + self.predictor_func = predictor_func + + #overridable + def on_host_tick(self): + self.predictor_func_host.obj.process_messages() + + #override + def cli_convert_face (self, img_bgr, img_face_landmarks, debug, avaperator_face_bgr=None, **kwargs): + if debug: + debugs = [img_bgr.copy()] + + img_size = img_bgr.shape[1], img_bgr.shape[0] + + img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) + img_face_mask_aaa = np.repeat(img_face_mask_a, 3, -1) + + output_size = self.predictor_input_size + face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=FaceType.FULL) + + dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size), flags=cv2.INTER_CUBIC ) + + predictor_input_dst_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) + prd_inp_dst_face_mask_a = predictor_input_dst_face_mask_a_0[...,np.newaxis] + + prd_inp_avaperator_face_bgr = cv2.resize (avaperator_face_bgr, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) + + prd_face_bgr = self.predictor_func ( prd_inp_avaperator_face_bgr, prd_inp_dst_face_mask_a ) + + out_img = img_bgr.copy() + out_img = cv2.warpAffine( prd_face_bgr, face_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + out_img = np.clip(out_img, 0.0, 1.0) + + if debug: + debugs += [out_img.copy()] + + out_img = np.clip( img_bgr*(1-img_face_mask_aaa) + (out_img*img_face_mask_aaa) , 0, 1.0 ) + + if debug: + debugs += [out_img.copy()] + + + return debugs if debug else out_img diff --git a/converters/ConverterImage.py b/converters/ConverterImage.py index 58b1faa..8324002 100644 --- a/converters/ConverterImage.py +++ b/converters/ConverterImage.py @@ -1,50 +1,50 @@ -import time - -import cv2 -import numpy as np - -from facelib import FaceType, LandmarksProcessor -from joblib import SubprocessFunctionCaller -from utils.pickle_utils import AntiPickler - -from .Converter import Converter - -class ConverterImage(Converter): - - #override - def __init__(self, predictor_func, - predictor_input_size=0): - - super().__init__(predictor_func, Converter.TYPE_IMAGE) - - self.predictor_input_size = predictor_input_size - - #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower - predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ) ) - time.sleep(2) - - predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) - self.predictor_func_host = AntiPickler(predictor_func_host) - self.predictor_func = predictor_func - - #overridable - def on_host_tick(self): - self.predictor_func_host.obj.process_messages() - - #override - def cli_convert_image (self, img_bgr, img_landmarks, debug): - img_size = img_bgr.shape[1], img_bgr.shape[0] - - predictor_input_bgr = cv2.resize ( img_bgr, (self.predictor_input_size, self.predictor_input_size), cv2.INTER_LANCZOS4 ) - - if debug: - debugs = [predictor_input_bgr] - - output = self.predictor_func ( predictor_input_bgr ) - - if debug: - return (predictor_input_bgr,output,) - if debug: - debugs += [out_img.copy()] - - return debugs if debug else output +import time + +import cv2 +import numpy as np + +from facelib import FaceType, LandmarksProcessor +from joblib import SubprocessFunctionCaller +from utils.pickle_utils import AntiPickler + +from .Converter import Converter + +class ConverterImage(Converter): + + #override + def __init__(self, predictor_func, + predictor_input_size=0): + + super().__init__(predictor_func, Converter.TYPE_IMAGE) + + self.predictor_input_size = predictor_input_size + + #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower + predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ) ) + time.sleep(2) + + predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) + self.predictor_func_host = AntiPickler(predictor_func_host) + self.predictor_func = predictor_func + + #overridable + def on_host_tick(self): + self.predictor_func_host.obj.process_messages() + + #override + def cli_convert_image (self, img_bgr, img_landmarks, debug): + img_size = img_bgr.shape[1], img_bgr.shape[0] + + predictor_input_bgr = cv2.resize ( img_bgr, (self.predictor_input_size, self.predictor_input_size), cv2.INTER_LANCZOS4 ) + + if debug: + debugs = [predictor_input_bgr] + + output = self.predictor_func ( predictor_input_bgr ) + + if debug: + return (predictor_input_bgr,output,) + if debug: + debugs += [out_img.copy()] + + return debugs if debug else output diff --git a/converters/ConverterMasked.py b/converters/ConverterMasked.py index 34e494c..cfa07fa 100644 --- a/converters/ConverterMasked.py +++ b/converters/ConverterMasked.py @@ -1,435 +1,435 @@ -import time -import traceback - -import cv2 -import numpy as np - -import imagelib -from facelib import FaceType, FANSegmentator, LandmarksProcessor -from interact import interact as io -from joblib import SubprocessFunctionCaller -from utils.pickle_utils import AntiPickler - -from .Converter import Converter - - -''' -default_mode = {1:'overlay', - 2:'hist-match', - 3:'hist-match-bw', - 4:'seamless', - 5:'seamless-hist-match', - 6:'raw'} -''' -class ConverterMasked(Converter): - - #override - def __init__(self, predictor_func, - predictor_input_size=0, - predictor_masked=True, - face_type=FaceType.FULL, - default_mode = 4, - base_erode_mask_modifier = 0, - base_blur_mask_modifier = 0, - default_erode_mask_modifier = 0, - default_blur_mask_modifier = 0, - clip_hborder_mask_per = 0, - force_mask_mode=-1): - - super().__init__(predictor_func, Converter.TYPE_FACE) - - #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower - predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ) ) - time.sleep(2) - - predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) - self.predictor_func_host = AntiPickler(predictor_func_host) - self.predictor_func = predictor_func - - self.predictor_masked = predictor_masked - self.predictor_input_size = predictor_input_size - self.face_type = face_type - self.clip_hborder_mask_per = clip_hborder_mask_per - - mode = io.input_int ("Choose mode: (1) overlay, (2) hist match, (3) hist match bw, (4) seamless, (5) raw. Default - %d : " % (default_mode) , default_mode) - - mode_dict = {1:'overlay', - 2:'hist-match', - 3:'hist-match-bw', - 4:'seamless', - 5:'raw'} - - self.mode = mode_dict.get (mode, mode_dict[default_mode] ) - - if self.mode == 'raw': - mode = io.input_int ("Choose raw mode: (1) rgb, (2) rgb+mask (default), (3) mask only, (4) predicted only : ", 2) - self.raw_mode = {1:'rgb', - 2:'rgb-mask', - 3:'mask-only', - 4:'predicted-only'}.get (mode, 'rgb-mask') - - if self.mode != 'raw': - - if self.mode == 'seamless': - if io.input_bool("Seamless hist match? (y/n skip:n) : ", False): - self.mode = 'seamless-hist-match' - - if self.mode == 'hist-match' or self.mode == 'hist-match-bw': - self.masked_hist_match = io.input_bool("Masked hist match? (y/n skip:y) : ", True) - - if self.mode == 'hist-match' or self.mode == 'hist-match-bw' or self.mode == 'seamless-hist-match': - self.hist_match_threshold = np.clip ( io.input_int("Hist match threshold [0..255] (skip:255) : ", 255), 0, 255) - - if force_mask_mode != -1: - self.mask_mode = force_mask_mode - else: - if face_type == FaceType.FULL: - self.mask_mode = np.clip ( io.input_int ("Mask mode: (1) learned, (2) dst, (3) FAN-prd, (4) FAN-dst , (5) FAN-prd*FAN-dst (6) learned*FAN-prd*FAN-dst (?) help. Default - %d : " % (1) , 1, help_message="If you learned mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images. 'FAN-prd' - using super smooth mask by pretrained FAN-model from predicted face. 'FAN-dst' - using super smooth mask by pretrained FAN-model from dst face. 'FAN-prd*FAN-dst' or 'learned*FAN-prd*FAN-dst' - using multiplied masks."), 1, 6 ) - else: - self.mask_mode = np.clip ( io.input_int ("Mask mode: (1) learned, (2) dst . Default - %d : " % (1) , 1), 1, 2 ) - - if self.mask_mode >= 3 and self.mask_mode <= 6: - self.fan_seg = None - - if self.mode != 'raw': - self.erode_mask_modifier = base_erode_mask_modifier + np.clip ( io.input_int ("Choose erode mask modifier [-200..200] (skip:%d) : " % (default_erode_mask_modifier), default_erode_mask_modifier), -200, 200) - self.blur_mask_modifier = base_blur_mask_modifier + np.clip ( io.input_int ("Choose blur mask modifier [-200..200] (skip:%d) : " % (default_blur_mask_modifier), default_blur_mask_modifier), -200, 200) - - self.output_face_scale = np.clip ( 1.0 + io.input_int ("Choose output face scale modifier [-50..50] (skip:0) : ", 0)*0.01, 0.5, 1.5) - - if self.mode != 'raw': - self.color_transfer_mode = io.input_str ("Apply color transfer to predicted face? Choose mode ( rct/lct skip:None ) : ", None, ['rct','lct']) - - self.super_resolution = io.input_bool("Apply super resolution? (y/n ?:help skip:n) : ", False, help_message="Enhance details by applying DCSCN network.") - - if self.mode != 'raw': - self.final_image_color_degrade_power = np.clip ( io.input_int ("Degrade color power of final image [0..100] (skip:0) : ", 0), 0, 100) - self.alpha = io.input_bool("Export png with alpha channel? (y/n skip:n) : ", False) - - io.log_info ("") - - if self.super_resolution: - host_proc, dc_upscale = SubprocessFunctionCaller.make_pair( imagelib.DCSCN().upscale ) - self.dc_host = AntiPickler(host_proc) - self.dc_upscale = dc_upscale - else: - self.dc_host = None - - #overridable - def on_host_tick(self): - self.predictor_func_host.obj.process_messages() - - if self.dc_host is not None: - self.dc_host.obj.process_messages() - - #overridable - def on_cli_initialize(self): - if (self.mask_mode >= 3 and self.mask_mode <= 6) and self.fan_seg == None: - self.fan_seg = FANSegmentator(256, FaceType.toString( self.face_type ) ) - - #override - def cli_convert_face (self, img_bgr, img_face_landmarks, debug, **kwargs): - if debug: - debugs = [img_bgr.copy()] - - img_size = img_bgr.shape[1], img_bgr.shape[0] - - img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) - - output_size = self.predictor_input_size - if self.super_resolution: - output_size *= 2 - - face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=self.face_type) - face_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=self.face_type, scale=self.output_face_scale) - - dst_face_bgr = cv2.warpAffine( img_bgr , face_mat, (output_size, output_size), flags=cv2.INTER_LANCZOS4 ) - dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size), flags=cv2.INTER_LANCZOS4 ) - - predictor_input_bgr = cv2.resize (dst_face_bgr, (self.predictor_input_size,self.predictor_input_size)) - - if self.predictor_masked: - prd_face_bgr, prd_face_mask_a_0 = self.predictor_func (predictor_input_bgr) - - prd_face_bgr = np.clip (prd_face_bgr, 0, 1.0 ) - prd_face_mask_a_0 = np.clip (prd_face_mask_a_0, 0.0, 1.0) - else: - predicted = self.predictor_func (predictor_input_bgr) - prd_face_bgr = np.clip (predicted, 0, 1.0 ) - prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size)) - - if self.super_resolution: - if debug: - tmp = cv2.resize (prd_face_bgr, (output_size,output_size), cv2.INTER_CUBIC) - debugs += [ np.clip( cv2.warpAffine( tmp, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - prd_face_bgr = self.dc_upscale(prd_face_bgr) - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - if self.predictor_masked: - prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) - else: - prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) - - if self.mask_mode == 2: #dst - prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) - elif self.mask_mode >= 3 and self.mask_mode <= 6: - - if self.mask_mode == 3 or self.mask_mode == 5 or self.mask_mode == 6: - prd_face_bgr_256 = cv2.resize (prd_face_bgr, (256,256) ) - prd_face_bgr_256_mask = self.fan_seg.extract( prd_face_bgr_256 ) - FAN_prd_face_mask_a_0 = cv2.resize (prd_face_bgr_256_mask, (output_size,output_size), cv2.INTER_CUBIC) - - if self.mask_mode == 4 or self.mask_mode == 5 or self.mask_mode == 6: - face_256_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, 256, face_type=FaceType.FULL) - dst_face_256_bgr = cv2.warpAffine(img_bgr, face_256_mat, (256, 256), flags=cv2.INTER_LANCZOS4 ) - dst_face_256_mask = self.fan_seg.extract( dst_face_256_bgr ) - FAN_dst_face_mask_a_0 = cv2.resize (dst_face_256_mask, (output_size,output_size), cv2.INTER_CUBIC) - - if self.mask_mode == 3: #FAN-prd - prd_face_mask_a_0 = FAN_prd_face_mask_a_0 - elif self.mask_mode == 4: #FAN-dst - prd_face_mask_a_0 = FAN_dst_face_mask_a_0 - elif self.mask_mode == 5: - prd_face_mask_a_0 = FAN_prd_face_mask_a_0 * FAN_dst_face_mask_a_0 - elif self.mask_mode == 6: - prd_face_mask_a_0 = prd_face_mask_a_0 * FAN_prd_face_mask_a_0 * FAN_dst_face_mask_a_0 - - prd_face_mask_a_0[ prd_face_mask_a_0 < 0.001 ] = 0.0 - - prd_face_mask_a = prd_face_mask_a_0[...,np.newaxis] - prd_face_mask_aaa = np.repeat (prd_face_mask_a, (3,), axis=-1) - - img_face_mask_aaa = cv2.warpAffine( prd_face_mask_aaa, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) - img_face_mask_aaa = np.clip (img_face_mask_aaa, 0.0, 1.0) - img_face_mask_aaa [ img_face_mask_aaa <= 0.1 ] = 0.0 #get rid of noise - - if debug: - debugs += [img_face_mask_aaa.copy()] - - - out_img = img_bgr.copy() - - if self.mode == 'raw': - if self.raw_mode == 'rgb' or self.raw_mode == 'rgb-mask': - out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - - if self.raw_mode == 'rgb-mask': - out_img = np.concatenate ( [out_img, np.expand_dims (img_face_mask_aaa[:,:,0],-1)], -1 ) - - if self.raw_mode == 'mask-only': - out_img = img_face_mask_aaa - - if self.raw_mode == 'predicted-only': - out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(out_img.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - - else: - #averaging [lenx, leny, maskx, masky] by grayscale gradients of upscaled mask - ar = [] - for i in range(1, 10): - maxregion = np.argwhere( img_face_mask_aaa > i / 10.0 ) - if maxregion.size != 0: - miny,minx = maxregion.min(axis=0)[:2] - maxy,maxx = maxregion.max(axis=0)[:2] - lenx = maxx - minx - leny = maxy - miny - if min(lenx,leny) >= 4: - ar += [ [ lenx, leny] ] - - if len(ar) > 0: - lenx, leny = np.mean ( ar, axis=0 ) - lowest_len = min (lenx, leny) - if debug: - io.log_info ("lenx/leny:(%d/%d) " % (lenx, leny ) ) - io.log_info ("lowest_len = %f" % (lowest_len) ) - - if self.erode_mask_modifier != 0: - ero = int( lowest_len * ( 0.126 - lowest_len * 0.00004551365 ) * 0.01*self.erode_mask_modifier ) - if debug: - io.log_info ("erode_size = %d" % (ero) ) - if ero > 0: - img_face_mask_aaa = cv2.erode(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) - elif ero < 0: - img_face_mask_aaa = cv2.dilate(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(-ero,-ero)), iterations = 1 ) - - img_mask_blurry_aaa = img_face_mask_aaa - - if self.clip_hborder_mask_per > 0: #clip hborder before blur - prd_hborder_rect_mask_a = np.ones ( prd_face_mask_a.shape, dtype=np.float32) - prd_border_size = int ( prd_hborder_rect_mask_a.shape[1] * self.clip_hborder_mask_per ) - prd_hborder_rect_mask_a[:,0:prd_border_size,:] = 0 - prd_hborder_rect_mask_a[:,-prd_border_size:,:] = 0 - prd_hborder_rect_mask_a = np.expand_dims(cv2.blur(prd_hborder_rect_mask_a, (prd_border_size, prd_border_size) ),-1) - - img_prd_hborder_rect_mask_a = cv2.warpAffine( prd_hborder_rect_mask_a, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) - img_prd_hborder_rect_mask_a = np.expand_dims (img_prd_hborder_rect_mask_a, -1) - img_mask_blurry_aaa *= img_prd_hborder_rect_mask_a - img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) - - if debug: - debugs += [img_mask_blurry_aaa.copy()] - - if self.blur_mask_modifier > 0: - blur = int( lowest_len * 0.10 * 0.01*self.blur_mask_modifier ) - if debug: - io.log_info ("blur_size = %d" % (blur) ) - if blur > 0: - img_mask_blurry_aaa = cv2.blur(img_mask_blurry_aaa, (blur, blur) ) - - img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) - face_mask_blurry_aaa = cv2.warpAffine( img_mask_blurry_aaa, face_mat, (output_size, output_size) ) - - if debug: - debugs += [img_mask_blurry_aaa.copy()] - - if 'seamless' not in self.mode and self.color_transfer_mode is not None: - if self.color_transfer_mode == 'rct': - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( (prd_face_bgr*255).astype(np.uint8), 0, 255), - np.clip( (dst_face_bgr*255).astype(np.uint8), 0, 255), - source_mask=prd_face_mask_a, target_mask=prd_face_mask_a) - prd_face_bgr = np.clip( prd_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) - - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - - elif self.color_transfer_mode == 'lct': - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - prd_face_bgr = imagelib.linear_color_transfer (prd_face_bgr, dst_face_bgr) - prd_face_bgr = np.clip( prd_face_bgr, 0.0, 1.0) - - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - if self.mode == 'hist-match-bw': - prd_face_bgr = cv2.cvtColor(prd_face_bgr, cv2.COLOR_BGR2GRAY) - prd_face_bgr = np.repeat( np.expand_dims (prd_face_bgr, -1), (3,), -1 ) - - if self.mode == 'hist-match' or self.mode == 'hist-match-bw': - if debug: - debugs += [ cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) ] - - hist_mask_a = np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) - - if self.masked_hist_match: - hist_mask_a *= prd_face_mask_a - - white = (1.0-hist_mask_a)* np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) - - hist_match_1 = prd_face_bgr*hist_mask_a + white - hist_match_1[ hist_match_1 > 1.0 ] = 1.0 - - hist_match_2 = dst_face_bgr*hist_mask_a + white - hist_match_2[ hist_match_1 > 1.0 ] = 1.0 - - prd_face_bgr = imagelib.color_hist_match(hist_match_1, hist_match_2, self.hist_match_threshold ) - - #if self.masked_hist_match: - # prd_face_bgr -= white - - if self.mode == 'hist-match-bw': - prd_face_bgr = prd_face_bgr.astype(dtype=np.float32) - - out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - out_img = np.clip(out_img, 0.0, 1.0) - - if debug: - debugs += [out_img.copy()] - - if self.mode == 'overlay': - pass - - if 'seamless' in self.mode: - #mask used for cv2.seamlessClone - img_face_seamless_mask_a = None - img_face_mask_a = img_mask_blurry_aaa[...,0:1] - for i in range(1,10): - a = img_face_mask_a > i / 10.0 - if len(np.argwhere(a)) == 0: - continue - img_face_seamless_mask_a = img_mask_blurry_aaa[...,0:1].copy() - img_face_seamless_mask_a[a] = 1.0 - img_face_seamless_mask_a[img_face_seamless_mask_a <= i / 10.0] = 0.0 - break - - try: - #calc same bounding rect and center point as in cv2.seamlessClone to prevent jittering - l,t,w,h = cv2.boundingRect( (img_face_seamless_mask_a*255).astype(np.uint8) ) - s_maskx, s_masky = int(l+w/2), int(t+h/2) - - out_img = cv2.seamlessClone( (out_img*255).astype(np.uint8), (img_bgr*255).astype(np.uint8), (img_face_seamless_mask_a*255).astype(np.uint8), (s_maskx,s_masky) , cv2.NORMAL_CLONE ) - out_img = out_img.astype(dtype=np.float32) / 255.0 - except Exception as e: - #seamlessClone may fail in some cases - e_str = traceback.format_exc() - - if 'MemoryError' in e_str: - raise Exception("Seamless fail: " + e_str) #reraise MemoryError in order to reprocess this data by other processes - else: - print ("Seamless fail: " + e_str) - - if debug: - debugs += [out_img.copy()] - - out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (out_img*img_mask_blurry_aaa) , 0, 1.0 ) - - if 'seamless' in self.mode and self.color_transfer_mode is not None: - out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) ) - - if self.color_transfer_mode == 'rct': - if debug: - debugs += [ np.clip( cv2.warpAffine( out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - new_out_face_bgr = imagelib.reinhard_color_transfer ( np.clip( (out_face_bgr*255).astype(np.uint8), 0, 255), - np.clip( (dst_face_bgr*255).astype(np.uint8), 0, 255), - source_mask=face_mask_blurry_aaa, target_mask=face_mask_blurry_aaa) - new_out_face_bgr = np.clip( new_out_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) - - if debug: - debugs += [ np.clip( cv2.warpAffine( new_out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - - elif self.color_transfer_mode == 'lct': - if debug: - debugs += [ np.clip( cv2.warpAffine( out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - new_out_face_bgr = imagelib.linear_color_transfer (out_face_bgr, dst_face_bgr) - new_out_face_bgr = np.clip( new_out_face_bgr, 0.0, 1.0) - - if debug: - debugs += [ np.clip( cv2.warpAffine( new_out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - new_out = cv2.warpAffine( new_out_face_bgr, face_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (new_out*img_mask_blurry_aaa) , 0, 1.0 ) - - if self.mode == 'seamless-hist-match': - out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) ) - new_out_face_bgr = imagelib.color_hist_match(out_face_bgr, dst_face_bgr, self.hist_match_threshold) - new_out = cv2.warpAffine( new_out_face_bgr, face_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (new_out*img_mask_blurry_aaa) , 0, 1.0 ) - - if self.final_image_color_degrade_power != 0: - if debug: - debugs += [out_img.copy()] - out_img_reduced = imagelib.reduce_colors(out_img, 256) - if self.final_image_color_degrade_power == 100: - out_img = out_img_reduced - else: - alpha = self.final_image_color_degrade_power / 100.0 - out_img = (out_img*(1.0-alpha) + out_img_reduced*alpha) - - if self.alpha: - out_img = np.concatenate ( [out_img, np.expand_dims (img_mask_blurry_aaa[:,:,0],-1)], -1 ) - - out_img = np.clip (out_img, 0.0, 1.0 ) - - if debug: - debugs += [out_img.copy()] - - return debugs if debug else out_img +import time +import traceback + +import cv2 +import numpy as np + +import imagelib +from facelib import FaceType, FANSegmentator, LandmarksProcessor +from interact import interact as io +from joblib import SubprocessFunctionCaller +from utils.pickle_utils import AntiPickler + +from .Converter import Converter + + +''' +default_mode = {1:'overlay', + 2:'hist-match', + 3:'hist-match-bw', + 4:'seamless', + 5:'seamless-hist-match', + 6:'raw'} +''' +class ConverterMasked(Converter): + + #override + def __init__(self, predictor_func, + predictor_input_size=0, + predictor_masked=True, + face_type=FaceType.FULL, + default_mode = 4, + base_erode_mask_modifier = 0, + base_blur_mask_modifier = 0, + default_erode_mask_modifier = 0, + default_blur_mask_modifier = 0, + clip_hborder_mask_per = 0, + force_mask_mode=-1): + + super().__init__(predictor_func, Converter.TYPE_FACE) + + #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower + predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ) ) + time.sleep(2) + + predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) + self.predictor_func_host = AntiPickler(predictor_func_host) + self.predictor_func = predictor_func + + self.predictor_masked = predictor_masked + self.predictor_input_size = predictor_input_size + self.face_type = face_type + self.clip_hborder_mask_per = clip_hborder_mask_per + + mode = io.input_int ("Choose mode: (1) overlay, (2) hist match, (3) hist match bw, (4) seamless, (5) raw. Default - %d : " % (default_mode) , default_mode) + + mode_dict = {1:'overlay', + 2:'hist-match', + 3:'hist-match-bw', + 4:'seamless', + 5:'raw'} + + self.mode = mode_dict.get (mode, mode_dict[default_mode] ) + + if self.mode == 'raw': + mode = io.input_int ("Choose raw mode: (1) rgb, (2) rgb+mask (default), (3) mask only, (4) predicted only : ", 2) + self.raw_mode = {1:'rgb', + 2:'rgb-mask', + 3:'mask-only', + 4:'predicted-only'}.get (mode, 'rgb-mask') + + if self.mode != 'raw': + + if self.mode == 'seamless': + if io.input_bool("Seamless hist match? (y/n skip:n) : ", False): + self.mode = 'seamless-hist-match' + + if self.mode == 'hist-match' or self.mode == 'hist-match-bw': + self.masked_hist_match = io.input_bool("Masked hist match? (y/n skip:y) : ", True) + + if self.mode == 'hist-match' or self.mode == 'hist-match-bw' or self.mode == 'seamless-hist-match': + self.hist_match_threshold = np.clip ( io.input_int("Hist match threshold [0..255] (skip:255) : ", 255), 0, 255) + + if force_mask_mode != -1: + self.mask_mode = force_mask_mode + else: + if face_type == FaceType.FULL: + self.mask_mode = np.clip ( io.input_int ("Mask mode: (1) learned, (2) dst, (3) FAN-prd, (4) FAN-dst , (5) FAN-prd*FAN-dst (6) learned*FAN-prd*FAN-dst (?) help. Default - %d : " % (1) , 1, help_message="If you learned mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images. 'FAN-prd' - using super smooth mask by pretrained FAN-model from predicted face. 'FAN-dst' - using super smooth mask by pretrained FAN-model from dst face. 'FAN-prd*FAN-dst' or 'learned*FAN-prd*FAN-dst' - using multiplied masks."), 1, 6 ) + else: + self.mask_mode = np.clip ( io.input_int ("Mask mode: (1) learned, (2) dst . Default - %d : " % (1) , 1), 1, 2 ) + + if self.mask_mode >= 3 and self.mask_mode <= 6: + self.fan_seg = None + + if self.mode != 'raw': + self.erode_mask_modifier = base_erode_mask_modifier + np.clip ( io.input_int ("Choose erode mask modifier [-200..200] (skip:%d) : " % (default_erode_mask_modifier), default_erode_mask_modifier), -200, 200) + self.blur_mask_modifier = base_blur_mask_modifier + np.clip ( io.input_int ("Choose blur mask modifier [-200..200] (skip:%d) : " % (default_blur_mask_modifier), default_blur_mask_modifier), -200, 200) + + self.output_face_scale = np.clip ( 1.0 + io.input_int ("Choose output face scale modifier [-50..50] (skip:0) : ", 0)*0.01, 0.5, 1.5) + + if self.mode != 'raw': + self.color_transfer_mode = io.input_str ("Apply color transfer to predicted face? Choose mode ( rct/lct skip:None ) : ", None, ['rct','lct']) + + self.super_resolution = io.input_bool("Apply super resolution? (y/n ?:help skip:n) : ", False, help_message="Enhance details by applying DCSCN network.") + + if self.mode != 'raw': + self.final_image_color_degrade_power = np.clip ( io.input_int ("Degrade color power of final image [0..100] (skip:0) : ", 0), 0, 100) + self.alpha = io.input_bool("Export png with alpha channel? (y/n skip:n) : ", False) + + io.log_info ("") + + if self.super_resolution: + host_proc, dc_upscale = SubprocessFunctionCaller.make_pair( imagelib.DCSCN().upscale ) + self.dc_host = AntiPickler(host_proc) + self.dc_upscale = dc_upscale + else: + self.dc_host = None + + #overridable + def on_host_tick(self): + self.predictor_func_host.obj.process_messages() + + if self.dc_host is not None: + self.dc_host.obj.process_messages() + + #overridable + def on_cli_initialize(self): + if (self.mask_mode >= 3 and self.mask_mode <= 6) and self.fan_seg == None: + self.fan_seg = FANSegmentator(256, FaceType.toString( self.face_type ) ) + + #override + def cli_convert_face (self, img_bgr, img_face_landmarks, debug, **kwargs): + if debug: + debugs = [img_bgr.copy()] + + img_size = img_bgr.shape[1], img_bgr.shape[0] + + img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) + + output_size = self.predictor_input_size + if self.super_resolution: + output_size *= 2 + + face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=self.face_type) + face_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=self.face_type, scale=self.output_face_scale) + + dst_face_bgr = cv2.warpAffine( img_bgr , face_mat, (output_size, output_size), flags=cv2.INTER_LANCZOS4 ) + dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size), flags=cv2.INTER_LANCZOS4 ) + + predictor_input_bgr = cv2.resize (dst_face_bgr, (self.predictor_input_size,self.predictor_input_size)) + + if self.predictor_masked: + prd_face_bgr, prd_face_mask_a_0 = self.predictor_func (predictor_input_bgr) + + prd_face_bgr = np.clip (prd_face_bgr, 0, 1.0 ) + prd_face_mask_a_0 = np.clip (prd_face_mask_a_0, 0.0, 1.0) + else: + predicted = self.predictor_func (predictor_input_bgr) + prd_face_bgr = np.clip (predicted, 0, 1.0 ) + prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size)) + + if self.super_resolution: + if debug: + tmp = cv2.resize (prd_face_bgr, (output_size,output_size), cv2.INTER_CUBIC) + debugs += [ np.clip( cv2.warpAffine( tmp, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + prd_face_bgr = self.dc_upscale(prd_face_bgr) + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + if self.predictor_masked: + prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) + else: + prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) + + if self.mask_mode == 2: #dst + prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) + elif self.mask_mode >= 3 and self.mask_mode <= 6: + + if self.mask_mode == 3 or self.mask_mode == 5 or self.mask_mode == 6: + prd_face_bgr_256 = cv2.resize (prd_face_bgr, (256,256) ) + prd_face_bgr_256_mask = self.fan_seg.extract( prd_face_bgr_256 ) + FAN_prd_face_mask_a_0 = cv2.resize (prd_face_bgr_256_mask, (output_size,output_size), cv2.INTER_CUBIC) + + if self.mask_mode == 4 or self.mask_mode == 5 or self.mask_mode == 6: + face_256_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, 256, face_type=FaceType.FULL) + dst_face_256_bgr = cv2.warpAffine(img_bgr, face_256_mat, (256, 256), flags=cv2.INTER_LANCZOS4 ) + dst_face_256_mask = self.fan_seg.extract( dst_face_256_bgr ) + FAN_dst_face_mask_a_0 = cv2.resize (dst_face_256_mask, (output_size,output_size), cv2.INTER_CUBIC) + + if self.mask_mode == 3: #FAN-prd + prd_face_mask_a_0 = FAN_prd_face_mask_a_0 + elif self.mask_mode == 4: #FAN-dst + prd_face_mask_a_0 = FAN_dst_face_mask_a_0 + elif self.mask_mode == 5: + prd_face_mask_a_0 = FAN_prd_face_mask_a_0 * FAN_dst_face_mask_a_0 + elif self.mask_mode == 6: + prd_face_mask_a_0 = prd_face_mask_a_0 * FAN_prd_face_mask_a_0 * FAN_dst_face_mask_a_0 + + prd_face_mask_a_0[ prd_face_mask_a_0 < 0.001 ] = 0.0 + + prd_face_mask_a = prd_face_mask_a_0[...,np.newaxis] + prd_face_mask_aaa = np.repeat (prd_face_mask_a, (3,), axis=-1) + + img_face_mask_aaa = cv2.warpAffine( prd_face_mask_aaa, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) + img_face_mask_aaa = np.clip (img_face_mask_aaa, 0.0, 1.0) + img_face_mask_aaa [ img_face_mask_aaa <= 0.1 ] = 0.0 #get rid of noise + + if debug: + debugs += [img_face_mask_aaa.copy()] + + + out_img = img_bgr.copy() + + if self.mode == 'raw': + if self.raw_mode == 'rgb' or self.raw_mode == 'rgb-mask': + out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + + if self.raw_mode == 'rgb-mask': + out_img = np.concatenate ( [out_img, np.expand_dims (img_face_mask_aaa[:,:,0],-1)], -1 ) + + if self.raw_mode == 'mask-only': + out_img = img_face_mask_aaa + + if self.raw_mode == 'predicted-only': + out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(out_img.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + + else: + #averaging [lenx, leny, maskx, masky] by grayscale gradients of upscaled mask + ar = [] + for i in range(1, 10): + maxregion = np.argwhere( img_face_mask_aaa > i / 10.0 ) + if maxregion.size != 0: + miny,minx = maxregion.min(axis=0)[:2] + maxy,maxx = maxregion.max(axis=0)[:2] + lenx = maxx - minx + leny = maxy - miny + if min(lenx,leny) >= 4: + ar += [ [ lenx, leny] ] + + if len(ar) > 0: + lenx, leny = np.mean ( ar, axis=0 ) + lowest_len = min (lenx, leny) + if debug: + io.log_info ("lenx/leny:(%d/%d) " % (lenx, leny ) ) + io.log_info ("lowest_len = %f" % (lowest_len) ) + + if self.erode_mask_modifier != 0: + ero = int( lowest_len * ( 0.126 - lowest_len * 0.00004551365 ) * 0.01*self.erode_mask_modifier ) + if debug: + io.log_info ("erode_size = %d" % (ero) ) + if ero > 0: + img_face_mask_aaa = cv2.erode(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) + elif ero < 0: + img_face_mask_aaa = cv2.dilate(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(-ero,-ero)), iterations = 1 ) + + img_mask_blurry_aaa = img_face_mask_aaa + + if self.clip_hborder_mask_per > 0: #clip hborder before blur + prd_hborder_rect_mask_a = np.ones ( prd_face_mask_a.shape, dtype=np.float32) + prd_border_size = int ( prd_hborder_rect_mask_a.shape[1] * self.clip_hborder_mask_per ) + prd_hborder_rect_mask_a[:,0:prd_border_size,:] = 0 + prd_hborder_rect_mask_a[:,-prd_border_size:,:] = 0 + prd_hborder_rect_mask_a = np.expand_dims(cv2.blur(prd_hborder_rect_mask_a, (prd_border_size, prd_border_size) ),-1) + + img_prd_hborder_rect_mask_a = cv2.warpAffine( prd_hborder_rect_mask_a, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) + img_prd_hborder_rect_mask_a = np.expand_dims (img_prd_hborder_rect_mask_a, -1) + img_mask_blurry_aaa *= img_prd_hborder_rect_mask_a + img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) + + if debug: + debugs += [img_mask_blurry_aaa.copy()] + + if self.blur_mask_modifier > 0: + blur = int( lowest_len * 0.10 * 0.01*self.blur_mask_modifier ) + if debug: + io.log_info ("blur_size = %d" % (blur) ) + if blur > 0: + img_mask_blurry_aaa = cv2.blur(img_mask_blurry_aaa, (blur, blur) ) + + img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) + face_mask_blurry_aaa = cv2.warpAffine( img_mask_blurry_aaa, face_mat, (output_size, output_size) ) + + if debug: + debugs += [img_mask_blurry_aaa.copy()] + + if 'seamless' not in self.mode and self.color_transfer_mode is not None: + if self.color_transfer_mode == 'rct': + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( (prd_face_bgr*255).astype(np.uint8), 0, 255), + np.clip( (dst_face_bgr*255).astype(np.uint8), 0, 255), + source_mask=prd_face_mask_a, target_mask=prd_face_mask_a) + prd_face_bgr = np.clip( prd_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) + + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + + elif self.color_transfer_mode == 'lct': + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + prd_face_bgr = imagelib.linear_color_transfer (prd_face_bgr, dst_face_bgr) + prd_face_bgr = np.clip( prd_face_bgr, 0.0, 1.0) + + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + if self.mode == 'hist-match-bw': + prd_face_bgr = cv2.cvtColor(prd_face_bgr, cv2.COLOR_BGR2GRAY) + prd_face_bgr = np.repeat( np.expand_dims (prd_face_bgr, -1), (3,), -1 ) + + if self.mode == 'hist-match' or self.mode == 'hist-match-bw': + if debug: + debugs += [ cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) ] + + hist_mask_a = np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) + + if self.masked_hist_match: + hist_mask_a *= prd_face_mask_a + + white = (1.0-hist_mask_a)* np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) + + hist_match_1 = prd_face_bgr*hist_mask_a + white + hist_match_1[ hist_match_1 > 1.0 ] = 1.0 + + hist_match_2 = dst_face_bgr*hist_mask_a + white + hist_match_2[ hist_match_1 > 1.0 ] = 1.0 + + prd_face_bgr = imagelib.color_hist_match(hist_match_1, hist_match_2, self.hist_match_threshold ) + + #if self.masked_hist_match: + # prd_face_bgr -= white + + if self.mode == 'hist-match-bw': + prd_face_bgr = prd_face_bgr.astype(dtype=np.float32) + + out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + out_img = np.clip(out_img, 0.0, 1.0) + + if debug: + debugs += [out_img.copy()] + + if self.mode == 'overlay': + pass + + if 'seamless' in self.mode: + #mask used for cv2.seamlessClone + img_face_seamless_mask_a = None + img_face_mask_a = img_mask_blurry_aaa[...,0:1] + for i in range(1,10): + a = img_face_mask_a > i / 10.0 + if len(np.argwhere(a)) == 0: + continue + img_face_seamless_mask_a = img_mask_blurry_aaa[...,0:1].copy() + img_face_seamless_mask_a[a] = 1.0 + img_face_seamless_mask_a[img_face_seamless_mask_a <= i / 10.0] = 0.0 + break + + try: + #calc same bounding rect and center point as in cv2.seamlessClone to prevent jittering + l,t,w,h = cv2.boundingRect( (img_face_seamless_mask_a*255).astype(np.uint8) ) + s_maskx, s_masky = int(l+w/2), int(t+h/2) + + out_img = cv2.seamlessClone( (out_img*255).astype(np.uint8), (img_bgr*255).astype(np.uint8), (img_face_seamless_mask_a*255).astype(np.uint8), (s_maskx,s_masky) , cv2.NORMAL_CLONE ) + out_img = out_img.astype(dtype=np.float32) / 255.0 + except Exception as e: + #seamlessClone may fail in some cases + e_str = traceback.format_exc() + + if 'MemoryError' in e_str: + raise Exception("Seamless fail: " + e_str) #reraise MemoryError in order to reprocess this data by other processes + else: + print ("Seamless fail: " + e_str) + + if debug: + debugs += [out_img.copy()] + + out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (out_img*img_mask_blurry_aaa) , 0, 1.0 ) + + if 'seamless' in self.mode and self.color_transfer_mode is not None: + out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) ) + + if self.color_transfer_mode == 'rct': + if debug: + debugs += [ np.clip( cv2.warpAffine( out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + new_out_face_bgr = imagelib.reinhard_color_transfer ( np.clip( (out_face_bgr*255).astype(np.uint8), 0, 255), + np.clip( (dst_face_bgr*255).astype(np.uint8), 0, 255), + source_mask=face_mask_blurry_aaa, target_mask=face_mask_blurry_aaa) + new_out_face_bgr = np.clip( new_out_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) + + if debug: + debugs += [ np.clip( cv2.warpAffine( new_out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + + elif self.color_transfer_mode == 'lct': + if debug: + debugs += [ np.clip( cv2.warpAffine( out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + new_out_face_bgr = imagelib.linear_color_transfer (out_face_bgr, dst_face_bgr) + new_out_face_bgr = np.clip( new_out_face_bgr, 0.0, 1.0) + + if debug: + debugs += [ np.clip( cv2.warpAffine( new_out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + new_out = cv2.warpAffine( new_out_face_bgr, face_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (new_out*img_mask_blurry_aaa) , 0, 1.0 ) + + if self.mode == 'seamless-hist-match': + out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) ) + new_out_face_bgr = imagelib.color_hist_match(out_face_bgr, dst_face_bgr, self.hist_match_threshold) + new_out = cv2.warpAffine( new_out_face_bgr, face_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (new_out*img_mask_blurry_aaa) , 0, 1.0 ) + + if self.final_image_color_degrade_power != 0: + if debug: + debugs += [out_img.copy()] + out_img_reduced = imagelib.reduce_colors(out_img, 256) + if self.final_image_color_degrade_power == 100: + out_img = out_img_reduced + else: + alpha = self.final_image_color_degrade_power / 100.0 + out_img = (out_img*(1.0-alpha) + out_img_reduced*alpha) + + if self.alpha: + out_img = np.concatenate ( [out_img, np.expand_dims (img_mask_blurry_aaa[:,:,0],-1)], -1 ) + + out_img = np.clip (out_img, 0.0, 1.0 ) + + if debug: + debugs += [out_img.copy()] + + return debugs if debug else out_img diff --git a/converters/__init__.py b/converters/__init__.py index ef544cb..be5d9c1 100644 --- a/converters/__init__.py +++ b/converters/__init__.py @@ -1,4 +1,4 @@ -from .Converter import Converter -from .ConverterMasked import ConverterMasked -from .ConverterImage import ConverterImage -from .ConverterAvatar import ConverterAvatar +from .Converter import Converter +from .ConverterMasked import ConverterMasked +from .ConverterImage import ConverterImage +from .ConverterAvatar import ConverterAvatar diff --git a/doc/doc_build_and_repository_info.md b/doc/doc_build_and_repository_info.md index 89a740e..cf9b5b1 100644 --- a/doc/doc_build_and_repository_info.md +++ b/doc/doc_build_and_repository_info.md @@ -1,5 +1,5 @@ -#### **CPU mode** - -It is possible to run from script for all stages using the `--cpu-only` flag. To run from script, install the separate dependencies for CPU mode using `pip -r requirements-cpu.txt`. - +#### **CPU mode** + +It is possible to run from script for all stages using the `--cpu-only` flag. To run from script, install the separate dependencies for CPU mode using `pip -r requirements-cpu.txt`. + Please note that extraction and training will take much long without a GPU and performance will greatly suffer without one. In particular, do not use DLIB extractor in CPU mode, it's too slow to run without a GPU. Train only on 64px resolution models like H64 or SAE (with low settings) and the lightweight encoder. \ No newline at end of file diff --git a/doc/doc_prebuilt_windows_app.md b/doc/doc_prebuilt_windows_app.md index 5746f5a..0c0d37a 100644 --- a/doc/doc_prebuilt_windows_app.md +++ b/doc/doc_prebuilt_windows_app.md @@ -1,25 +1,25 @@ -### **Prebuilt Windows Releases** - -Windows builds with all dependencies included are released regularly. Only the NVIDIA GeForce display driver needs to be installed. Prebuilt DeepFaceLab, including GPU and CPU versions, can be downloaded from - -[Google drive](https://drive.google.com/open?id=1BCFK_L7lPNwMbEQ_kFPqPpDdFEOd_Dci) - -[Mega](https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg) - -Available builds: - -* DeepFaceLabCUDA9.2SSE - for NVIDIA cards up to GTX1080 and any 64-bit CPU - -* DeepFaceLabCUDA10.1AVX - for NVIDIA cards up to RTX and CPU with AVX instructions support - -* DeepFaceLabOpenCLSSE - for AMD/IntelHD cards and any 64-bit CPU - -#### Video tutorials using prebuilt windows app - -* [Basic workflow](https://www.youtube.com/watch?v=K98nTNjXkq8) - -* [Basic workflow (thanks @derpfakes)](https://www.youtube.com/watch?v=cVcyghhmQSA) - -* [How To Make DeepFakes With DeepFaceLab - An Amatuer's Guide](https://www.youtube.com/watch?v=wBax7_UWXvc) - +### **Prebuilt Windows Releases** + +Windows builds with all dependencies included are released regularly. Only the NVIDIA GeForce display driver needs to be installed. Prebuilt DeepFaceLab, including GPU and CPU versions, can be downloaded from + +[Google drive](https://drive.google.com/open?id=1BCFK_L7lPNwMbEQ_kFPqPpDdFEOd_Dci) + +[Mega](https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg) + +Available builds: + +* DeepFaceLabCUDA9.2SSE - for NVIDIA cards up to GTX1080 and any 64-bit CPU + +* DeepFaceLabCUDA10.1AVX - for NVIDIA cards up to RTX and CPU with AVX instructions support + +* DeepFaceLabOpenCLSSE - for AMD/IntelHD cards and any 64-bit CPU + +#### Video tutorials using prebuilt windows app + +* [Basic workflow](https://www.youtube.com/watch?v=K98nTNjXkq8) + +* [Basic workflow (thanks @derpfakes)](https://www.youtube.com/watch?v=cVcyghhmQSA) + +* [How To Make DeepFakes With DeepFaceLab - An Amatuer's Guide](https://www.youtube.com/watch?v=wBax7_UWXvc) + * [Manual re-extract poorly aligned frames](https://www.youtube.com/watch?v=7z1ykVVCHhM) \ No newline at end of file diff --git a/doc/doc_ready_to_work_facesets.md b/doc/doc_ready_to_work_facesets.md index fb3344f..13ada80 100644 --- a/doc/doc_ready_to_work_facesets.md +++ b/doc/doc_ready_to_work_facesets.md @@ -1,11 +1,11 @@ -### **Example Face Sets**: - -Faces sets for the following have been pre-extracted, - -- Nicolas Cage -- Steve Jobs -- Putin -- Elon Musk -- Harrison Ford - -[Download from Mega](https://mega.nz/#F!y1ERHDaL!PPwg01PQZk0FhWLVo5_MaQ) +### **Example Face Sets**: + +Faces sets for the following have been pre-extracted, + +- Nicolas Cage +- Steve Jobs +- Putin +- Elon Musk +- Harrison Ford + +[Download from Mega](https://mega.nz/#F!y1ERHDaL!PPwg01PQZk0FhWLVo5_MaQ) diff --git a/doc/gallery/doc_gallery.md b/doc/gallery/doc_gallery.md index 5ba780f..aac2d37 100644 --- a/doc/gallery/doc_gallery.md +++ b/doc/gallery/doc_gallery.md @@ -1,3 +1,3 @@ -![](1.jpg) - +![](1.jpg) + ![](2.jpg) \ No newline at end of file diff --git a/facelib/DLIBExtractor.py b/facelib/DLIBExtractor.py index a91164d..b8230ad 100644 --- a/facelib/DLIBExtractor.py +++ b/facelib/DLIBExtractor.py @@ -1,40 +1,40 @@ -import numpy as np -import os -import cv2 - -from pathlib import Path - -class DLIBExtractor(object): - def __init__(self, dlib): - self.scale_to = 1850 - #3100 eats ~1.687GB VRAM on 2GB 730 desktop card, but >4Gb on 6GB card, - #but 3100 doesnt work on 2GB 850M notebook card, I cant understand this behaviour - #1850 works on 2GB 850M notebook card, works faster than 3100, produces good result - self.dlib = dlib - - def __enter__(self): - self.dlib_cnn_face_detector = self.dlib.cnn_face_detection_model_v1( str(Path(__file__).parent / "mmod_human_face_detector.dat") ) - self.dlib_cnn_face_detector ( np.zeros ( (self.scale_to, self.scale_to, 3), dtype=np.uint8), 0 ) - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - del self.dlib_cnn_face_detector - return False #pass exception between __enter__ and __exit__ to outter level - - def extract_from_bgr (self, input_image): - input_image = input_image[:,:,::-1].copy() - (h, w, ch) = input_image.shape - - detected_faces = [] - input_scale = self.scale_to / (w if w > h else h) - input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) - detected_faces = self.dlib_cnn_face_detector(input_image, 0) - - result = [] - for d_rect in detected_faces: - if type(d_rect) == self.dlib.mmod_rectangle: - d_rect = d_rect.rect - left, top, right, bottom = d_rect.left(), d_rect.top(), d_rect.right(), d_rect.bottom() - result.append ( (int(left/input_scale), int(top/input_scale), int(right/input_scale), int(bottom/input_scale)) ) - - return result +import numpy as np +import os +import cv2 + +from pathlib import Path + +class DLIBExtractor(object): + def __init__(self, dlib): + self.scale_to = 1850 + #3100 eats ~1.687GB VRAM on 2GB 730 desktop card, but >4Gb on 6GB card, + #but 3100 doesnt work on 2GB 850M notebook card, I cant understand this behaviour + #1850 works on 2GB 850M notebook card, works faster than 3100, produces good result + self.dlib = dlib + + def __enter__(self): + self.dlib_cnn_face_detector = self.dlib.cnn_face_detection_model_v1( str(Path(__file__).parent / "mmod_human_face_detector.dat") ) + self.dlib_cnn_face_detector ( np.zeros ( (self.scale_to, self.scale_to, 3), dtype=np.uint8), 0 ) + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + del self.dlib_cnn_face_detector + return False #pass exception between __enter__ and __exit__ to outter level + + def extract_from_bgr (self, input_image): + input_image = input_image[:,:,::-1].copy() + (h, w, ch) = input_image.shape + + detected_faces = [] + input_scale = self.scale_to / (w if w > h else h) + input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) + detected_faces = self.dlib_cnn_face_detector(input_image, 0) + + result = [] + for d_rect in detected_faces: + if type(d_rect) == self.dlib.mmod_rectangle: + d_rect = d_rect.rect + left, top, right, bottom = d_rect.left(), d_rect.top(), d_rect.right(), d_rect.bottom() + result.append ( (int(left/input_scale), int(top/input_scale), int(right/input_scale), int(bottom/input_scale)) ) + + return result diff --git a/facelib/FANSegmentator.py b/facelib/FANSegmentator.py index e2ae705..d944aaa 100644 --- a/facelib/FANSegmentator.py +++ b/facelib/FANSegmentator.py @@ -1,139 +1,139 @@ -import os -import pickle -from functools import partial -from pathlib import Path - -import cv2 -import numpy as np - -from interact import interact as io -from nnlib import nnlib - -""" -FANSegmentator is designed to exclude obstructions from faces such as hair, fingers, etc. - -Dataset used to train located in official DFL mega.nz folder -https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg - -using https://github.com/ternaus/TernausNet -TernausNet: U-Net with VGG11 Encoder Pre-Trained on ImageNet for Image Segmentation -""" - -class FANSegmentator(object): - VERSION = 1 - def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): - exec( nnlib.import_all(), locals(), globals() ) - - self.model = FANSegmentator.BuildModel(resolution, ngf=64) - - if weights_file_root is not None: - weights_file_root = Path(weights_file_root) - else: - weights_file_root = Path(__file__).parent - - self.weights_path = weights_file_root / ('FANSeg_%d_%s.h5' % (resolution, face_type_str) ) - - if load_weights: - self.model.load_weights (str(self.weights_path)) - else: - if training: - try: - with open( Path(__file__).parent / 'vgg11_enc_weights.npy', 'rb' ) as f: - d = pickle.loads (f.read()) - - for i in [0,3,6,8,11,13,16,18]: - s = 'features.%d' % i - - self.model.get_layer (s).set_weights ( d[s] ) - except: - io.log_err("Unable to load VGG11 pretrained weights from vgg11_enc_weights.npy") - - if training: - #self.model.compile(loss='mse', optimizer=Adam(tf_cpu_mode=2)) - self.model.compile(loss='binary_crossentropy', optimizer=Adam(tf_cpu_mode=2) ) - - def __enter__(self): - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def save_weights(self): - self.model.save_weights (str(self.weights_path)) - - def train_on_batch(self, inp, outp): - return self.model.train_on_batch(inp, outp) - - def extract (self, input_image, is_input_tanh=False): - input_shape_len = len(input_image.shape) - if input_shape_len == 3: - input_image = input_image[np.newaxis,...] - - result = np.clip ( self.model.predict( [input_image] ), 0, 1.0 ) - result[result < 0.1] = 0 #get rid of noise - - if input_shape_len == 3: - result = result[0] - - return result - - @staticmethod - def BuildModel ( resolution, ngf=64, norm='', act='lrelu'): - exec( nnlib.import_all(), locals(), globals() ) - inp = Input ( (resolution,resolution,3) ) - x = inp - x = FANSegmentator.Flow(ngf=ngf, norm=norm, act=act)(x) - model = Model(inp,x) - return model - - @staticmethod - def Flow(ngf=64, num_downs=4, norm='', act='lrelu'): - exec( nnlib.import_all(), locals(), globals() ) - - def func(input): - x = input - - x0 = x = Conv2D(ngf, kernel_size=3, strides=1, padding='same', activation='relu', name='features.0')(x) - x = MaxPooling2D()(x) - - x1 = x = Conv2D(ngf*2, kernel_size=3, strides=1, padding='same', activation='relu', name='features.3')(x) - x = MaxPooling2D()(x) - - x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.6')(x) - x2 = x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.8')(x) - x = MaxPooling2D()(x) - - x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.11')(x) - x3 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.13')(x) - x = MaxPooling2D()(x) - - x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.16')(x) - x4 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.18')(x) - x = MaxPooling2D()(x) - - x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same')(x) - - x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x4]) - x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu') (x) - - x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x3]) - x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu') (x) - - x = Conv2DTranspose (ngf*2, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x2]) - x = Conv2D (ngf*4, 3, strides=1, padding='same', activation='relu') (x) - - x = Conv2DTranspose (ngf, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x1]) - x = Conv2D (ngf*2, 3, strides=1, padding='same', activation='relu') (x) - - x = Conv2DTranspose (ngf // 2, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x0]) - x = Conv2D (ngf, 3, strides=1, padding='same', activation='relu') (x) - - return Conv2D(1, 3, strides=1, padding='same', activation='sigmoid')(x) - - - return func +import os +import pickle +from functools import partial +from pathlib import Path + +import cv2 +import numpy as np + +from interact import interact as io +from nnlib import nnlib + +""" +FANSegmentator is designed to exclude obstructions from faces such as hair, fingers, etc. + +Dataset used to train located in official DFL mega.nz folder +https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg + +using https://github.com/ternaus/TernausNet +TernausNet: U-Net with VGG11 Encoder Pre-Trained on ImageNet for Image Segmentation +""" + +class FANSegmentator(object): + VERSION = 1 + def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): + exec( nnlib.import_all(), locals(), globals() ) + + self.model = FANSegmentator.BuildModel(resolution, ngf=64) + + if weights_file_root is not None: + weights_file_root = Path(weights_file_root) + else: + weights_file_root = Path(__file__).parent + + self.weights_path = weights_file_root / ('FANSeg_%d_%s.h5' % (resolution, face_type_str) ) + + if load_weights: + self.model.load_weights (str(self.weights_path)) + else: + if training: + try: + with open( Path(__file__).parent / 'vgg11_enc_weights.npy', 'rb' ) as f: + d = pickle.loads (f.read()) + + for i in [0,3,6,8,11,13,16,18]: + s = 'features.%d' % i + + self.model.get_layer (s).set_weights ( d[s] ) + except: + io.log_err("Unable to load VGG11 pretrained weights from vgg11_enc_weights.npy") + + if training: + #self.model.compile(loss='mse', optimizer=Adam(tf_cpu_mode=2)) + self.model.compile(loss='binary_crossentropy', optimizer=Adam(tf_cpu_mode=2) ) + + def __enter__(self): + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def save_weights(self): + self.model.save_weights (str(self.weights_path)) + + def train_on_batch(self, inp, outp): + return self.model.train_on_batch(inp, outp) + + def extract (self, input_image, is_input_tanh=False): + input_shape_len = len(input_image.shape) + if input_shape_len == 3: + input_image = input_image[np.newaxis,...] + + result = np.clip ( self.model.predict( [input_image] ), 0, 1.0 ) + result[result < 0.1] = 0 #get rid of noise + + if input_shape_len == 3: + result = result[0] + + return result + + @staticmethod + def BuildModel ( resolution, ngf=64, norm='', act='lrelu'): + exec( nnlib.import_all(), locals(), globals() ) + inp = Input ( (resolution,resolution,3) ) + x = inp + x = FANSegmentator.Flow(ngf=ngf, norm=norm, act=act)(x) + model = Model(inp,x) + return model + + @staticmethod + def Flow(ngf=64, num_downs=4, norm='', act='lrelu'): + exec( nnlib.import_all(), locals(), globals() ) + + def func(input): + x = input + + x0 = x = Conv2D(ngf, kernel_size=3, strides=1, padding='same', activation='relu', name='features.0')(x) + x = MaxPooling2D()(x) + + x1 = x = Conv2D(ngf*2, kernel_size=3, strides=1, padding='same', activation='relu', name='features.3')(x) + x = MaxPooling2D()(x) + + x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.6')(x) + x2 = x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.8')(x) + x = MaxPooling2D()(x) + + x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.11')(x) + x3 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.13')(x) + x = MaxPooling2D()(x) + + x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.16')(x) + x4 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.18')(x) + x = MaxPooling2D()(x) + + x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same')(x) + + x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x4]) + x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu') (x) + + x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x3]) + x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu') (x) + + x = Conv2DTranspose (ngf*2, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x2]) + x = Conv2D (ngf*4, 3, strides=1, padding='same', activation='relu') (x) + + x = Conv2DTranspose (ngf, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x1]) + x = Conv2D (ngf*2, 3, strides=1, padding='same', activation='relu') (x) + + x = Conv2DTranspose (ngf // 2, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x0]) + x = Conv2D (ngf, 3, strides=1, padding='same', activation='relu') (x) + + return Conv2D(1, 3, strides=1, padding='same', activation='sigmoid')(x) + + + return func diff --git a/facelib/FaceType.py b/facelib/FaceType.py index f0d5530..5cd1e8f 100644 --- a/facelib/FaceType.py +++ b/facelib/FaceType.py @@ -1,33 +1,33 @@ -from enum import IntEnum - -class FaceType(IntEnum): - HALF = 0, - FULL = 1, - HEAD = 2, - AVATAR = 3, #centered nose only - MARK_ONLY = 4, #no align at all, just embedded faceinfo - QTY = 5 - - @staticmethod - def fromString (s): - r = from_string_dict.get (s.lower()) - if r is None: - raise Exception ('FaceType.fromString value error') - return r - - @staticmethod - def toString (face_type): - return to_string_list[face_type] - -from_string_dict = {'half_face': FaceType.HALF, - 'full_face': FaceType.FULL, - 'head' : FaceType.HEAD, - 'avatar' : FaceType.AVATAR, - 'mark_only' : FaceType.MARK_ONLY, - } -to_string_list = [ 'half_face', - 'full_face', - 'head', - 'avatar', - 'mark_only' - ] +from enum import IntEnum + +class FaceType(IntEnum): + HALF = 0, + FULL = 1, + HEAD = 2, + AVATAR = 3, #centered nose only + MARK_ONLY = 4, #no align at all, just embedded faceinfo + QTY = 5 + + @staticmethod + def fromString (s): + r = from_string_dict.get (s.lower()) + if r is None: + raise Exception ('FaceType.fromString value error') + return r + + @staticmethod + def toString (face_type): + return to_string_list[face_type] + +from_string_dict = {'half_face': FaceType.HALF, + 'full_face': FaceType.FULL, + 'head' : FaceType.HEAD, + 'avatar' : FaceType.AVATAR, + 'mark_only' : FaceType.MARK_ONLY, + } +to_string_list = [ 'half_face', + 'full_face', + 'head', + 'avatar', + 'mark_only' + ] diff --git a/facelib/LandmarksExtractor.py b/facelib/LandmarksExtractor.py index c9ca49d..1f37814 100644 --- a/facelib/LandmarksExtractor.py +++ b/facelib/LandmarksExtractor.py @@ -1,120 +1,120 @@ -import traceback -import numpy as np -import os -import cv2 -from pathlib import Path -from facelib import FaceType -from facelib import LandmarksProcessor - -class LandmarksExtractor(object): - def __init__ (self, keras): - self.keras = keras - K = self.keras.backend - - def __enter__(self): - keras_model_path = Path(__file__).parent / "2DFAN-4.h5" - if not keras_model_path.exists(): - return None - - self.keras_model = self.keras.models.load_model (str(keras_model_path)) - - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - del self.keras_model - return False #pass exception between __enter__ and __exit__ to outter level - - def extract (self, input_image, rects, second_pass_extractor=None, is_bgr=True): - if len(rects) == 0: - return [] - - if is_bgr: - input_image = input_image[:,:,::-1] - is_bgr = False - - (h, w, ch) = input_image.shape - - landmarks = [] - for (left, top, right, bottom) in rects: - try: - center = np.array( [ (left + right) / 2.0, (top + bottom) / 2.0] ) - scale = (right - left + bottom - top) / 195.0 - - image = self.crop(input_image, center, scale).astype(np.float32) - image = np.expand_dims(image, 0) - - predicted = self.keras_model.predict (image).transpose (0,3,1,2) - - pts_img = self.get_pts_from_predict ( predicted[-1], center, scale) - landmarks.append (pts_img) - except: - landmarks.append (None) - - if second_pass_extractor is not None: - for i in range(len(landmarks)): - try: - lmrks = landmarks[i] - if lmrks is None: - continue - - image_to_face_mat = LandmarksProcessor.get_transform_mat (lmrks, 256, FaceType.FULL) - face_image = cv2.warpAffine(input_image, image_to_face_mat, (256, 256), cv2.INTER_CUBIC ) - - rects2 = second_pass_extractor.extract(face_image, is_bgr=is_bgr) - if len(rects2) != 1: #dont do second pass if faces != 1 detected in cropped image - continue - - lmrks2 = self.extract (face_image, [ rects2[0] ], is_bgr=is_bgr)[0] - source_lmrks2 = LandmarksProcessor.transform_points (lmrks2, image_to_face_mat, True) - landmarks[i] = source_lmrks2 - except: - continue - - return landmarks - - def transform(self, point, center, scale, resolution): - pt = np.array ( [point[0], point[1], 1.0] ) - h = 200.0 * scale - m = np.eye(3) - m[0,0] = resolution / h - m[1,1] = resolution / h - m[0,2] = resolution * ( -center[0] / h + 0.5 ) - m[1,2] = resolution * ( -center[1] / h + 0.5 ) - m = np.linalg.inv(m) - return np.matmul (m, pt)[0:2] - - def crop(self, image, center, scale, resolution=256.0): - ul = self.transform([1, 1], center, scale, resolution).astype( np.int ) - br = self.transform([resolution, resolution], center, scale, resolution).astype( np.int ) - - if image.ndim > 2: - newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32) - newImg = np.zeros(newDim, dtype=np.uint8) - else: - newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int) - newImg = np.zeros(newDim, dtype=np.uint8) - ht = image.shape[0] - wd = image.shape[1] - newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32) - newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32) - oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32) - oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32) - newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :] - - newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR) - return newImg - - def get_pts_from_predict(self, a, center, scale): - b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) ) - c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float) - c[:,0] %= a.shape[2] - c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] ) - - for i in range(a.shape[0]): - pX, pY = int(c[i,0]), int(c[i,1]) - if pX > 0 and pX < 63 and pY > 0 and pY < 63: - diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] ) - c[i] += np.sign(diff)*0.25 - - c += 0.5 - return np.array( [ self.transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ] ) +import traceback +import numpy as np +import os +import cv2 +from pathlib import Path +from facelib import FaceType +from facelib import LandmarksProcessor + +class LandmarksExtractor(object): + def __init__ (self, keras): + self.keras = keras + K = self.keras.backend + + def __enter__(self): + keras_model_path = Path(__file__).parent / "2DFAN-4.h5" + if not keras_model_path.exists(): + return None + + self.keras_model = self.keras.models.load_model (str(keras_model_path)) + + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + del self.keras_model + return False #pass exception between __enter__ and __exit__ to outter level + + def extract (self, input_image, rects, second_pass_extractor=None, is_bgr=True): + if len(rects) == 0: + return [] + + if is_bgr: + input_image = input_image[:,:,::-1] + is_bgr = False + + (h, w, ch) = input_image.shape + + landmarks = [] + for (left, top, right, bottom) in rects: + try: + center = np.array( [ (left + right) / 2.0, (top + bottom) / 2.0] ) + scale = (right - left + bottom - top) / 195.0 + + image = self.crop(input_image, center, scale).astype(np.float32) + image = np.expand_dims(image, 0) + + predicted = self.keras_model.predict (image).transpose (0,3,1,2) + + pts_img = self.get_pts_from_predict ( predicted[-1], center, scale) + landmarks.append (pts_img) + except: + landmarks.append (None) + + if second_pass_extractor is not None: + for i in range(len(landmarks)): + try: + lmrks = landmarks[i] + if lmrks is None: + continue + + image_to_face_mat = LandmarksProcessor.get_transform_mat (lmrks, 256, FaceType.FULL) + face_image = cv2.warpAffine(input_image, image_to_face_mat, (256, 256), cv2.INTER_CUBIC ) + + rects2 = second_pass_extractor.extract(face_image, is_bgr=is_bgr) + if len(rects2) != 1: #dont do second pass if faces != 1 detected in cropped image + continue + + lmrks2 = self.extract (face_image, [ rects2[0] ], is_bgr=is_bgr)[0] + source_lmrks2 = LandmarksProcessor.transform_points (lmrks2, image_to_face_mat, True) + landmarks[i] = source_lmrks2 + except: + continue + + return landmarks + + def transform(self, point, center, scale, resolution): + pt = np.array ( [point[0], point[1], 1.0] ) + h = 200.0 * scale + m = np.eye(3) + m[0,0] = resolution / h + m[1,1] = resolution / h + m[0,2] = resolution * ( -center[0] / h + 0.5 ) + m[1,2] = resolution * ( -center[1] / h + 0.5 ) + m = np.linalg.inv(m) + return np.matmul (m, pt)[0:2] + + def crop(self, image, center, scale, resolution=256.0): + ul = self.transform([1, 1], center, scale, resolution).astype( np.int ) + br = self.transform([resolution, resolution], center, scale, resolution).astype( np.int ) + + if image.ndim > 2: + newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32) + newImg = np.zeros(newDim, dtype=np.uint8) + else: + newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int) + newImg = np.zeros(newDim, dtype=np.uint8) + ht = image.shape[0] + wd = image.shape[1] + newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32) + newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32) + oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32) + oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32) + newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :] + + newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR) + return newImg + + def get_pts_from_predict(self, a, center, scale): + b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) ) + c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float) + c[:,0] %= a.shape[2] + c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] ) + + for i in range(a.shape[0]): + pX, pY = int(c[i,0]), int(c[i,1]) + if pX > 0 and pX < 63 and pY > 0 and pY < 63: + diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] ) + c[i] += np.sign(diff)*0.25 + + c += 0.5 + return np.array( [ self.transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ] ) diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py index 66c29dd..06d895d 100644 --- a/facelib/LandmarksProcessor.py +++ b/facelib/LandmarksProcessor.py @@ -1,386 +1,386 @@ -import colorsys -import cv2 -import numpy as np -from enum import IntEnum -import mathlib -import imagelib -from imagelib import IEPolys -from mathlib.umeyama import umeyama -from facelib import FaceType -import math - -mean_face_x = np.array([ -0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, -0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, -0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, -0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, -0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, -0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, -0.553364, 0.490127, 0.42689 ]) - -mean_face_y = np.array([ -0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891, -0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326, -0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733, -0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099, -0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805, -0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746, -0.784792, 0.824182, 0.831803, 0.824182 ]) - -landmarks_2D = np.stack( [ mean_face_x, mean_face_y ], axis=1 ) - -# 68 point landmark definitions -landmarks_68_pt = { "mouth": (48,68), - "right_eyebrow": (17, 22), - "left_eyebrow": (22, 27), - "right_eye": (36, 42), - "left_eye": (42, 48), - "nose": (27, 36), # missed one point - "jaw": (0, 17) } - - -landmarks_68_3D = np.array( [ -[-73.393523 , -29.801432 , 47.667532 ], -[-72.775014 , -10.949766 , 45.909403 ], -[-70.533638 , 7.929818 , 44.842580 ], -[-66.850058 , 26.074280 , 43.141114 ], -[-59.790187 , 42.564390 , 38.635298 ], -[-48.368973 , 56.481080 , 30.750622 ], -[-34.121101 , 67.246992 , 18.456453 ], -[-17.875411 , 75.056892 , 3.609035 ], -[0.098749 , 77.061286 , -0.881698 ], -[17.477031 , 74.758448 , 5.181201 ], -[32.648966 , 66.929021 , 19.176563 ], -[46.372358 , 56.311389 , 30.770570 ], -[57.343480 , 42.419126 , 37.628629 ], -[64.388482 , 25.455880 , 40.886309 ], -[68.212038 , 6.990805 , 42.281449 ], -[70.486405 , -11.666193 , 44.142567 ], -[71.375822 , -30.365191 , 47.140426 ], -[-61.119406 , -49.361602 , 14.254422 ], -[-51.287588 , -58.769795 , 7.268147 ], -[-37.804800 , -61.996155 , 0.442051 ], -[-24.022754 , -61.033399 , -6.606501 ], -[-11.635713 , -56.686759 , -11.967398 ], -[12.056636 , -57.391033 , -12.051204 ], -[25.106256 , -61.902186 , -7.315098 ], -[38.338588 , -62.777713 , -1.022953 ], -[51.191007 , -59.302347 , 5.349435 ], -[60.053851 , -50.190255 , 11.615746 ], -[0.653940 , -42.193790 , -13.380835 ], -[0.804809 , -30.993721 , -21.150853 ], -[0.992204 , -19.944596 , -29.284036 ], -[1.226783 , -8.414541 , -36.948060 ], -[-14.772472 , 2.598255 , -20.132003 ], -[-7.180239 , 4.751589 , -23.536684 ], -[0.555920 , 6.562900 , -25.944448 ], -[8.272499 , 4.661005 , -23.695741 ], -[15.214351 , 2.643046 , -20.858157 ], -[-46.047290 , -37.471411 , 7.037989 ], -[-37.674688 , -42.730510 , 3.021217 ], -[-27.883856 , -42.711517 , 1.353629 ], -[-19.648268 , -36.754742 , -0.111088 ], -[-28.272965 , -35.134493 , -0.147273 ], -[-38.082418 , -34.919043 , 1.476612 ], -[19.265868 , -37.032306 , -0.665746 ], -[27.894191 , -43.342445 , 0.247660 ], -[37.437529 , -43.110822 , 1.696435 ], -[45.170805 , -38.086515 , 4.894163 ], -[38.196454 , -35.532024 , 0.282961 ], -[28.764989 , -35.484289 , -1.172675 ], -[-28.916267 , 28.612716 , -2.240310 ], -[-17.533194 , 22.172187 , -15.934335 ], -[-6.684590 , 19.029051 , -22.611355 ], -[0.381001 , 20.721118 , -23.748437 ], -[8.375443 , 19.035460 , -22.721995 ], -[18.876618 , 22.394109 , -15.610679 ], -[28.794412 , 28.079924 , -3.217393 ], -[19.057574 , 36.298248 , -14.987997 ], -[8.956375 , 39.634575 , -22.554245 ], -[0.381549 , 40.395647 , -23.591626 ], -[-7.428895 , 39.836405 , -22.406106 ], -[-18.160634 , 36.677899 , -15.121907 ], -[-24.377490 , 28.677771 , -4.785684 ], -[-6.897633 , 25.475976 , -20.893742 ], -[0.340663 , 26.014269 , -22.220479 ], -[8.444722 , 25.326198 , -21.025520 ], -[24.474473 , 28.323008 , -5.712776 ], -[8.449166 , 30.596216 , -20.671489 ], -[0.205322 , 31.408738 , -21.903670 ], -[-7.198266 , 30.844876 , -20.328022 ] ], dtype=np.float32) - -def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): - if not isinstance(image_landmarks, np.ndarray): - image_landmarks = np.array (image_landmarks) - - if face_type == FaceType.AVATAR: - centroid = np.mean (image_landmarks, axis=0) - - mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] - a, c = mat[0,0], mat[1,0] - scale = math.sqrt((a * a) + (c * c)) - - padding = (output_size / 64) * 32 - - mat = np.eye ( 2,3 ) - mat[0,2] = -centroid[0] - mat[1,2] = -centroid[1] - mat = mat * scale * (output_size / 3) - mat[:,2] += output_size / 2 - else: - if face_type == FaceType.HALF: - padding = 0 - elif face_type == FaceType.FULL: - padding = (output_size / 64) * 12 - elif face_type == FaceType.HEAD: - padding = (output_size / 64) * 24 - else: - raise ValueError ('wrong face_type: ', face_type) - - mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] - mat = mat * (output_size - 2 * padding) - mat[:,2] += padding - mat *= (1 / scale) - mat[:,2] += -output_size*( ( (1 / scale) - 1.0 ) / 2 ) - - return mat - -def transform_points(points, mat, invert=False): - if invert: - mat = cv2.invertAffineTransform (mat) - points = np.expand_dims(points, axis=1) - points = cv2.transform(points, mat, points.shape) - points = np.squeeze(points) - return points - - -def get_image_hull_mask (image_shape, image_landmarks, ie_polys=None): - if len(image_landmarks) != 68: - raise Exception('get_image_hull_mask works only with 68 landmarks') - int_lmrks = np.array(image_landmarks.copy(), dtype=np.int) - - hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[0:9], - # int_lmrks[17:18]))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[8:17], - # int_lmrks[26:27]))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[17:20], - # int_lmrks[8:9]))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[24:27], - # int_lmrks[8:9]))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[19:25], - # int_lmrks[8:9], - # ))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[17:22], - # int_lmrks[27:28], - # int_lmrks[31:36], - # int_lmrks[8:9] - # ))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[22:27], - # int_lmrks[27:28], - # int_lmrks[31:36], - # int_lmrks[8:9] - # ))) , (1,) ) - - # #nose - # cv2.fillConvexPoly( hull_mask, cv2.convexHull(int_lmrks[27:36]), (1,) ) - ml_pnt = (int_lmrks[36] + int_lmrks[0]) // 2 - mr_pnt = (int_lmrks[16] + int_lmrks[45]) // 2 - - # mid points between the mid points and eye - ql_pnt = (int_lmrks[36] + ml_pnt) // 2 - qr_pnt = (int_lmrks[45] + mr_pnt) // 2 - - # Top of the eye arrays - bot_l = np.array((ql_pnt, int_lmrks[36], int_lmrks[37], int_lmrks[38], int_lmrks[39])) - bot_r = np.array((int_lmrks[42], int_lmrks[43], int_lmrks[44], int_lmrks[45], qr_pnt)) - - # Eyebrow arrays - top_l = int_lmrks[17:22] - top_r = int_lmrks[22:27] - - # Adjust eyebrow arrays - int_lmrks[17:22] = top_l + ((top_l - bot_l) // 2) - int_lmrks[22:27] = top_r + ((top_r - bot_r) // 2) - - r_jaw = (int_lmrks[0:9], int_lmrks[17:18]) - l_jaw = (int_lmrks[8:17], int_lmrks[26:27]) - r_cheek = (int_lmrks[17:20], int_lmrks[8:9]) - l_cheek = (int_lmrks[24:27], int_lmrks[8:9]) - nose_ridge = (int_lmrks[19:25], int_lmrks[8:9],) - r_eye = (int_lmrks[17:22], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) - l_eye = (int_lmrks[22:27], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) - nose = (int_lmrks[27:31], int_lmrks[31:36]) - parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] - - for item in parts: - merged = np.concatenate(item) - cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member - - if ie_polys is not None: - ie_polys.overlay_mask(hull_mask) - - return hull_mask - -def get_image_eye_mask (image_shape, image_landmarks): - if len(image_landmarks) != 68: - raise Exception('get_image_eye_mask works only with 68 landmarks') - - hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) - - cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[36:42]), (1,) ) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[42:48]), (1,) ) - - return hull_mask - -def blur_image_hull_mask (hull_mask): - - maxregion = np.argwhere(hull_mask==1.0) - miny,minx = maxregion.min(axis=0)[:2] - maxy,maxx = maxregion.max(axis=0)[:2] - lenx = maxx - minx; - leny = maxy - miny; - masky = int(minx+(lenx//2)) - maskx = int(miny+(leny//2)) - lowest_len = min (lenx, leny) - ero = int( lowest_len * 0.085 ) - blur = int( lowest_len * 0.10 ) - - hull_mask = cv2.erode(hull_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) - hull_mask = cv2.blur(hull_mask, (blur, blur) ) - hull_mask = np.expand_dims (hull_mask,-1) - - return hull_mask - -mirror_idxs = [ - [0,16], - [1,15], - [2,14], - [3,13], - [4,12], - [5,11], - [6,10], - [7,9], - - [17,26], - [18,25], - [19,24], - [20,23], - [21,22], - - [36,45], - [37,44], - [38,43], - [39,42], - [40,47], - [41,46], - - [31,35], - [32,34], - - [50,52], - [49,53], - [48,54], - [59,55], - [58,56], - [67,65], - [60,64], - [61,63] ] - -def mirror_landmarks (landmarks, val): - result = landmarks.copy() - - for idx in mirror_idxs: - result [ idx ] = result [ idx[::-1] ] - - result[:,0] = val - result[:,0] - 1 - return result - -def draw_landmarks (image, image_landmarks, color=(0,255,0), transparent_mask=False, ie_polys=None): - if len(image_landmarks) != 68: - raise Exception('get_image_eye_mask works only with 68 landmarks') - - int_lmrks = np.array(image_landmarks, dtype=np.int) - - jaw = int_lmrks[slice(*landmarks_68_pt["jaw"])] - right_eyebrow = int_lmrks[slice(*landmarks_68_pt["right_eyebrow"])] - left_eyebrow = int_lmrks[slice(*landmarks_68_pt["left_eyebrow"])] - mouth = int_lmrks[slice(*landmarks_68_pt["mouth"])] - right_eye = int_lmrks[slice(*landmarks_68_pt["right_eye"])] - left_eye = int_lmrks[slice(*landmarks_68_pt["left_eye"])] - nose = int_lmrks[slice(*landmarks_68_pt["nose"])] - - # open shapes - cv2.polylines(image, tuple(np.array([v]) for v in ( right_eyebrow, jaw, left_eyebrow, np.concatenate((nose, [nose[-6]])) )), - False, color, lineType=cv2.LINE_AA) - # closed shapes - cv2.polylines(image, tuple(np.array([v]) for v in (right_eye, left_eye, mouth)), - True, color, lineType=cv2.LINE_AA) - # the rest of the cicles - for x, y in np.concatenate((right_eyebrow, left_eyebrow, mouth, right_eye, left_eye, nose), axis=0): - cv2.circle(image, (x, y), 1, color, 1, lineType=cv2.LINE_AA) - # jaw big circles - for x, y in jaw: - cv2.circle(image, (x, y), 2, color, lineType=cv2.LINE_AA) - - if transparent_mask: - mask = get_image_hull_mask (image.shape, image_landmarks, ie_polys) - image[...] = ( image * (1-mask) + image * mask / 2 )[...] - -def draw_rect_landmarks (image, rect, image_landmarks, face_size, face_type, transparent_mask=False, ie_polys=None, landmarks_color=(0,255,0) ): - draw_landmarks(image, image_landmarks, color=landmarks_color, transparent_mask=transparent_mask, ie_polys=ie_polys) - imagelib.draw_rect (image, rect, (255,0,0), 2 ) - - image_to_face_mat = get_transform_mat (image_landmarks, face_size, face_type) - points = transform_points ( [ (0,0), (0,face_size-1), (face_size-1, face_size-1), (face_size-1,0) ], image_to_face_mat, True) - imagelib.draw_polygon (image, points, (0,0,255), 2) - -def calc_face_pitch(landmarks): - if not isinstance(landmarks, np.ndarray): - landmarks = np.array (landmarks) - t = ( (landmarks[6][1]-landmarks[8][1]) + (landmarks[10][1]-landmarks[8][1]) ) / 2.0 - b = landmarks[8][1] - return float(b-t) - -def calc_face_yaw(landmarks): - if not isinstance(landmarks, np.ndarray): - landmarks = np.array (landmarks) - l = ( (landmarks[27][0]-landmarks[0][0]) + (landmarks[28][0]-landmarks[1][0]) + (landmarks[29][0]-landmarks[2][0]) ) / 3.0 - r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0 - return float(r-l) - -#returns pitch,yaw,roll [-1...+1] -def estimate_pitch_yaw_roll(aligned_256px_landmarks): - shape = (256,256) - focal_length = shape[1] - camera_center = (shape[1] / 2, shape[0] / 2) - camera_matrix = np.array( - [[focal_length, 0, camera_center[0]], - [0, focal_length, camera_center[1]], - [0, 0, 1]], dtype=np.float32) - - (_, rotation_vector, translation_vector) = cv2.solvePnP( - landmarks_68_3D, - aligned_256px_landmarks.astype(np.float32), - camera_matrix, - np.zeros((4, 1)) ) - - pitch, yaw, roll = mathlib.rotationMatrixToEulerAngles( cv2.Rodrigues(rotation_vector)[0] ) - pitch = np.clip ( pitch/1.30, -1.0, 1.0 ) - yaw = np.clip ( yaw / 1.11, -1.0, 1.0 ) - roll = np.clip ( roll/3.15, -1.0, 1.0 ) - return -pitch, yaw, roll +import colorsys +import cv2 +import numpy as np +from enum import IntEnum +import mathlib +import imagelib +from imagelib import IEPolys +from mathlib.umeyama import umeyama +from facelib import FaceType +import math + +mean_face_x = np.array([ +0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, +0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, +0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, +0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, +0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, +0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, +0.553364, 0.490127, 0.42689 ]) + +mean_face_y = np.array([ +0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891, +0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326, +0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733, +0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099, +0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805, +0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746, +0.784792, 0.824182, 0.831803, 0.824182 ]) + +landmarks_2D = np.stack( [ mean_face_x, mean_face_y ], axis=1 ) + +# 68 point landmark definitions +landmarks_68_pt = { "mouth": (48,68), + "right_eyebrow": (17, 22), + "left_eyebrow": (22, 27), + "right_eye": (36, 42), + "left_eye": (42, 48), + "nose": (27, 36), # missed one point + "jaw": (0, 17) } + + +landmarks_68_3D = np.array( [ +[-73.393523 , -29.801432 , 47.667532 ], +[-72.775014 , -10.949766 , 45.909403 ], +[-70.533638 , 7.929818 , 44.842580 ], +[-66.850058 , 26.074280 , 43.141114 ], +[-59.790187 , 42.564390 , 38.635298 ], +[-48.368973 , 56.481080 , 30.750622 ], +[-34.121101 , 67.246992 , 18.456453 ], +[-17.875411 , 75.056892 , 3.609035 ], +[0.098749 , 77.061286 , -0.881698 ], +[17.477031 , 74.758448 , 5.181201 ], +[32.648966 , 66.929021 , 19.176563 ], +[46.372358 , 56.311389 , 30.770570 ], +[57.343480 , 42.419126 , 37.628629 ], +[64.388482 , 25.455880 , 40.886309 ], +[68.212038 , 6.990805 , 42.281449 ], +[70.486405 , -11.666193 , 44.142567 ], +[71.375822 , -30.365191 , 47.140426 ], +[-61.119406 , -49.361602 , 14.254422 ], +[-51.287588 , -58.769795 , 7.268147 ], +[-37.804800 , -61.996155 , 0.442051 ], +[-24.022754 , -61.033399 , -6.606501 ], +[-11.635713 , -56.686759 , -11.967398 ], +[12.056636 , -57.391033 , -12.051204 ], +[25.106256 , -61.902186 , -7.315098 ], +[38.338588 , -62.777713 , -1.022953 ], +[51.191007 , -59.302347 , 5.349435 ], +[60.053851 , -50.190255 , 11.615746 ], +[0.653940 , -42.193790 , -13.380835 ], +[0.804809 , -30.993721 , -21.150853 ], +[0.992204 , -19.944596 , -29.284036 ], +[1.226783 , -8.414541 , -36.948060 ], +[-14.772472 , 2.598255 , -20.132003 ], +[-7.180239 , 4.751589 , -23.536684 ], +[0.555920 , 6.562900 , -25.944448 ], +[8.272499 , 4.661005 , -23.695741 ], +[15.214351 , 2.643046 , -20.858157 ], +[-46.047290 , -37.471411 , 7.037989 ], +[-37.674688 , -42.730510 , 3.021217 ], +[-27.883856 , -42.711517 , 1.353629 ], +[-19.648268 , -36.754742 , -0.111088 ], +[-28.272965 , -35.134493 , -0.147273 ], +[-38.082418 , -34.919043 , 1.476612 ], +[19.265868 , -37.032306 , -0.665746 ], +[27.894191 , -43.342445 , 0.247660 ], +[37.437529 , -43.110822 , 1.696435 ], +[45.170805 , -38.086515 , 4.894163 ], +[38.196454 , -35.532024 , 0.282961 ], +[28.764989 , -35.484289 , -1.172675 ], +[-28.916267 , 28.612716 , -2.240310 ], +[-17.533194 , 22.172187 , -15.934335 ], +[-6.684590 , 19.029051 , -22.611355 ], +[0.381001 , 20.721118 , -23.748437 ], +[8.375443 , 19.035460 , -22.721995 ], +[18.876618 , 22.394109 , -15.610679 ], +[28.794412 , 28.079924 , -3.217393 ], +[19.057574 , 36.298248 , -14.987997 ], +[8.956375 , 39.634575 , -22.554245 ], +[0.381549 , 40.395647 , -23.591626 ], +[-7.428895 , 39.836405 , -22.406106 ], +[-18.160634 , 36.677899 , -15.121907 ], +[-24.377490 , 28.677771 , -4.785684 ], +[-6.897633 , 25.475976 , -20.893742 ], +[0.340663 , 26.014269 , -22.220479 ], +[8.444722 , 25.326198 , -21.025520 ], +[24.474473 , 28.323008 , -5.712776 ], +[8.449166 , 30.596216 , -20.671489 ], +[0.205322 , 31.408738 , -21.903670 ], +[-7.198266 , 30.844876 , -20.328022 ] ], dtype=np.float32) + +def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): + if not isinstance(image_landmarks, np.ndarray): + image_landmarks = np.array (image_landmarks) + + if face_type == FaceType.AVATAR: + centroid = np.mean (image_landmarks, axis=0) + + mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] + a, c = mat[0,0], mat[1,0] + scale = math.sqrt((a * a) + (c * c)) + + padding = (output_size / 64) * 32 + + mat = np.eye ( 2,3 ) + mat[0,2] = -centroid[0] + mat[1,2] = -centroid[1] + mat = mat * scale * (output_size / 3) + mat[:,2] += output_size / 2 + else: + if face_type == FaceType.HALF: + padding = 0 + elif face_type == FaceType.FULL: + padding = (output_size / 64) * 12 + elif face_type == FaceType.HEAD: + padding = (output_size / 64) * 24 + else: + raise ValueError ('wrong face_type: ', face_type) + + mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] + mat = mat * (output_size - 2 * padding) + mat[:,2] += padding + mat *= (1 / scale) + mat[:,2] += -output_size*( ( (1 / scale) - 1.0 ) / 2 ) + + return mat + +def transform_points(points, mat, invert=False): + if invert: + mat = cv2.invertAffineTransform (mat) + points = np.expand_dims(points, axis=1) + points = cv2.transform(points, mat, points.shape) + points = np.squeeze(points) + return points + + +def get_image_hull_mask (image_shape, image_landmarks, ie_polys=None): + if len(image_landmarks) != 68: + raise Exception('get_image_hull_mask works only with 68 landmarks') + int_lmrks = np.array(image_landmarks.copy(), dtype=np.int) + + hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[0:9], + # int_lmrks[17:18]))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[8:17], + # int_lmrks[26:27]))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[17:20], + # int_lmrks[8:9]))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[24:27], + # int_lmrks[8:9]))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[19:25], + # int_lmrks[8:9], + # ))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[17:22], + # int_lmrks[27:28], + # int_lmrks[31:36], + # int_lmrks[8:9] + # ))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[22:27], + # int_lmrks[27:28], + # int_lmrks[31:36], + # int_lmrks[8:9] + # ))) , (1,) ) + + # #nose + # cv2.fillConvexPoly( hull_mask, cv2.convexHull(int_lmrks[27:36]), (1,) ) + ml_pnt = (int_lmrks[36] + int_lmrks[0]) // 2 + mr_pnt = (int_lmrks[16] + int_lmrks[45]) // 2 + + # mid points between the mid points and eye + ql_pnt = (int_lmrks[36] + ml_pnt) // 2 + qr_pnt = (int_lmrks[45] + mr_pnt) // 2 + + # Top of the eye arrays + bot_l = np.array((ql_pnt, int_lmrks[36], int_lmrks[37], int_lmrks[38], int_lmrks[39])) + bot_r = np.array((int_lmrks[42], int_lmrks[43], int_lmrks[44], int_lmrks[45], qr_pnt)) + + # Eyebrow arrays + top_l = int_lmrks[17:22] + top_r = int_lmrks[22:27] + + # Adjust eyebrow arrays + int_lmrks[17:22] = top_l + ((top_l - bot_l) // 2) + int_lmrks[22:27] = top_r + ((top_r - bot_r) // 2) + + r_jaw = (int_lmrks[0:9], int_lmrks[17:18]) + l_jaw = (int_lmrks[8:17], int_lmrks[26:27]) + r_cheek = (int_lmrks[17:20], int_lmrks[8:9]) + l_cheek = (int_lmrks[24:27], int_lmrks[8:9]) + nose_ridge = (int_lmrks[19:25], int_lmrks[8:9],) + r_eye = (int_lmrks[17:22], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) + l_eye = (int_lmrks[22:27], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) + nose = (int_lmrks[27:31], int_lmrks[31:36]) + parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] + + for item in parts: + merged = np.concatenate(item) + cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member + + if ie_polys is not None: + ie_polys.overlay_mask(hull_mask) + + return hull_mask + +def get_image_eye_mask (image_shape, image_landmarks): + if len(image_landmarks) != 68: + raise Exception('get_image_eye_mask works only with 68 landmarks') + + hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) + + cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[36:42]), (1,) ) + cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[42:48]), (1,) ) + + return hull_mask + +def blur_image_hull_mask (hull_mask): + + maxregion = np.argwhere(hull_mask==1.0) + miny,minx = maxregion.min(axis=0)[:2] + maxy,maxx = maxregion.max(axis=0)[:2] + lenx = maxx - minx; + leny = maxy - miny; + masky = int(minx+(lenx//2)) + maskx = int(miny+(leny//2)) + lowest_len = min (lenx, leny) + ero = int( lowest_len * 0.085 ) + blur = int( lowest_len * 0.10 ) + + hull_mask = cv2.erode(hull_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) + hull_mask = cv2.blur(hull_mask, (blur, blur) ) + hull_mask = np.expand_dims (hull_mask,-1) + + return hull_mask + +mirror_idxs = [ + [0,16], + [1,15], + [2,14], + [3,13], + [4,12], + [5,11], + [6,10], + [7,9], + + [17,26], + [18,25], + [19,24], + [20,23], + [21,22], + + [36,45], + [37,44], + [38,43], + [39,42], + [40,47], + [41,46], + + [31,35], + [32,34], + + [50,52], + [49,53], + [48,54], + [59,55], + [58,56], + [67,65], + [60,64], + [61,63] ] + +def mirror_landmarks (landmarks, val): + result = landmarks.copy() + + for idx in mirror_idxs: + result [ idx ] = result [ idx[::-1] ] + + result[:,0] = val - result[:,0] - 1 + return result + +def draw_landmarks (image, image_landmarks, color=(0,255,0), transparent_mask=False, ie_polys=None): + if len(image_landmarks) != 68: + raise Exception('get_image_eye_mask works only with 68 landmarks') + + int_lmrks = np.array(image_landmarks, dtype=np.int) + + jaw = int_lmrks[slice(*landmarks_68_pt["jaw"])] + right_eyebrow = int_lmrks[slice(*landmarks_68_pt["right_eyebrow"])] + left_eyebrow = int_lmrks[slice(*landmarks_68_pt["left_eyebrow"])] + mouth = int_lmrks[slice(*landmarks_68_pt["mouth"])] + right_eye = int_lmrks[slice(*landmarks_68_pt["right_eye"])] + left_eye = int_lmrks[slice(*landmarks_68_pt["left_eye"])] + nose = int_lmrks[slice(*landmarks_68_pt["nose"])] + + # open shapes + cv2.polylines(image, tuple(np.array([v]) for v in ( right_eyebrow, jaw, left_eyebrow, np.concatenate((nose, [nose[-6]])) )), + False, color, lineType=cv2.LINE_AA) + # closed shapes + cv2.polylines(image, tuple(np.array([v]) for v in (right_eye, left_eye, mouth)), + True, color, lineType=cv2.LINE_AA) + # the rest of the cicles + for x, y in np.concatenate((right_eyebrow, left_eyebrow, mouth, right_eye, left_eye, nose), axis=0): + cv2.circle(image, (x, y), 1, color, 1, lineType=cv2.LINE_AA) + # jaw big circles + for x, y in jaw: + cv2.circle(image, (x, y), 2, color, lineType=cv2.LINE_AA) + + if transparent_mask: + mask = get_image_hull_mask (image.shape, image_landmarks, ie_polys) + image[...] = ( image * (1-mask) + image * mask / 2 )[...] + +def draw_rect_landmarks (image, rect, image_landmarks, face_size, face_type, transparent_mask=False, ie_polys=None, landmarks_color=(0,255,0) ): + draw_landmarks(image, image_landmarks, color=landmarks_color, transparent_mask=transparent_mask, ie_polys=ie_polys) + imagelib.draw_rect (image, rect, (255,0,0), 2 ) + + image_to_face_mat = get_transform_mat (image_landmarks, face_size, face_type) + points = transform_points ( [ (0,0), (0,face_size-1), (face_size-1, face_size-1), (face_size-1,0) ], image_to_face_mat, True) + imagelib.draw_polygon (image, points, (0,0,255), 2) + +def calc_face_pitch(landmarks): + if not isinstance(landmarks, np.ndarray): + landmarks = np.array (landmarks) + t = ( (landmarks[6][1]-landmarks[8][1]) + (landmarks[10][1]-landmarks[8][1]) ) / 2.0 + b = landmarks[8][1] + return float(b-t) + +def calc_face_yaw(landmarks): + if not isinstance(landmarks, np.ndarray): + landmarks = np.array (landmarks) + l = ( (landmarks[27][0]-landmarks[0][0]) + (landmarks[28][0]-landmarks[1][0]) + (landmarks[29][0]-landmarks[2][0]) ) / 3.0 + r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0 + return float(r-l) + +#returns pitch,yaw,roll [-1...+1] +def estimate_pitch_yaw_roll(aligned_256px_landmarks): + shape = (256,256) + focal_length = shape[1] + camera_center = (shape[1] / 2, shape[0] / 2) + camera_matrix = np.array( + [[focal_length, 0, camera_center[0]], + [0, focal_length, camera_center[1]], + [0, 0, 1]], dtype=np.float32) + + (_, rotation_vector, translation_vector) = cv2.solvePnP( + landmarks_68_3D, + aligned_256px_landmarks.astype(np.float32), + camera_matrix, + np.zeros((4, 1)) ) + + pitch, yaw, roll = mathlib.rotationMatrixToEulerAngles( cv2.Rodrigues(rotation_vector)[0] ) + pitch = np.clip ( pitch/1.30, -1.0, 1.0 ) + yaw = np.clip ( yaw / 1.11, -1.0, 1.0 ) + roll = np.clip ( roll/3.15, -1.0, 1.0 ) + return -pitch, yaw, roll diff --git a/facelib/MTCExtractor.py b/facelib/MTCExtractor.py index c524ab9..056e8ad 100644 --- a/facelib/MTCExtractor.py +++ b/facelib/MTCExtractor.py @@ -1,350 +1,350 @@ -import numpy as np -import os -import cv2 - -from pathlib import Path -from nnlib import nnlib - -class MTCExtractor(object): - def __init__(self): - self.scale_to = 1920 - - self.min_face_size = self.scale_to * 0.042 - self.thresh1 = 0.7 - self.thresh2 = 0.85 - self.thresh3 = 0.6 - self.scale_factor = 0.95 - - exec( nnlib.import_all(), locals(), globals() ) - PNet_Input = Input ( (None, None,3) ) - x = PNet_Input - x = Conv2D (10, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) - x = PReLU (shared_axes=[1,2], name="PReLU1" )(x) - x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) - x = Conv2D (16, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) - x = PReLU (shared_axes=[1,2], name="PReLU2" )(x) - x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) - x = PReLU (shared_axes=[1,2], name="PReLU3" )(x) - prob = Conv2D (2, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv41")(x) - prob = Softmax()(prob) - x = Conv2D (4, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv42")(x) - - PNet_model = Model(PNet_Input, [x,prob] ) - PNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_pnet.h5').__str__() ) - - RNet_Input = Input ( (24, 24, 3) ) - x = RNet_Input - x = Conv2D (28, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) - x = PReLU (shared_axes=[1,2], name="prelu1" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) - x = Conv2D (48, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) - x = PReLU (shared_axes=[1,2], name="prelu2" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) - x = Conv2D (64, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv3")(x) - x = PReLU (shared_axes=[1,2], name="prelu3" )(x) - x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) - x = Dense (128, name='conv4')(x) - x = PReLU (name="prelu4" )(x) - prob = Dense (2, name='conv51')(x) - prob = Softmax()(prob) - x = Dense (4, name='conv52')(x) - RNet_model = Model(RNet_Input, [x,prob] ) - RNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_rnet.h5').__str__() ) - - ONet_Input = Input ( (48, 48, 3) ) - x = ONet_Input - x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) - x = PReLU (shared_axes=[1,2], name="prelu1" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) - x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) - x = PReLU (shared_axes=[1,2], name="prelu2" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) - x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) - x = PReLU (shared_axes=[1,2], name="prelu3" )(x) - x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) - x = Conv2D (128, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv4")(x) - x = PReLU (shared_axes=[1,2], name="prelu4" )(x) - x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) - x = Dense (256, name='conv5')(x) - x = PReLU (name="prelu5" )(x) - prob = Dense (2, name='conv61')(x) - prob = Softmax()(prob) - x1 = Dense (4, name='conv62')(x) - x2 = Dense (10, name='conv63')(x) - ONet_model = Model(ONet_Input, [x1,x2,prob] ) - ONet_model.load_weights ( (Path(__file__).parent / 'mtcnn_onet.h5').__str__() ) - - self.pnet_fun = K.function ( PNet_model.inputs, PNet_model.outputs ) - self.rnet_fun = K.function ( RNet_model.inputs, RNet_model.outputs ) - self.onet_fun = K.function ( ONet_model.inputs, ONet_model.outputs ) - - def __enter__(self): - faces, pnts = detect_face ( np.zeros ( (self.scale_to, self.scale_to, 3)), self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) - - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def extract (self, input_image, is_bgr=True): - - if is_bgr: - input_image = input_image[:,:,::-1].copy() - is_bgr = False - - (h, w, ch) = input_image.shape - - input_scale = self.scale_to / max(w,h) - input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) - - detected_faces, pnts = detect_face ( input_image, self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) - detected_faces = [ ( int(face[0]/input_scale), int(face[1]/input_scale), int(face[2]/input_scale), int(face[3]/input_scale)) for face in detected_faces ] - - return detected_faces - -def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): - """Detects faces in an image, and returns bounding boxes and points for them. - img: input image - minsize: minimum faces' size - pnet, rnet, onet: caffemodel - threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold - factor: the factor used to create a scaling pyramid of face sizes to detect in the image. - """ - factor_count=0 - total_boxes=np.empty((0,9)) - points=np.empty(0) - h=img.shape[0] - w=img.shape[1] - minl=np.amin([h, w]) - m=12.0/minsize - minl=minl*m - # create scale pyramid - scales=[] - while minl>=12: - scales += [m*np.power(factor, factor_count)] - minl = minl*factor - factor_count += 1 - # first stage - for scale in scales: - hs=int(np.ceil(h*scale)) - ws=int(np.ceil(w*scale)) - #print ('scale %f %d %d' % (scale, ws,hs)) - im_data = imresample(img, (hs, ws)) - im_data = (im_data-127.5)*0.0078125 - img_x = np.expand_dims(im_data, 0) - img_y = np.transpose(img_x, (0,2,1,3)) - out = pnet([img_y]) - out0 = np.transpose(out[0], (0,2,1,3)) - out1 = np.transpose(out[1], (0,2,1,3)) - - boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) - - # inter-scale nms - pick = nms(boxes.copy(), 0.5, 'Union') - if boxes.size>0 and pick.size>0: - boxes = boxes[pick,:] - total_boxes = np.append(total_boxes, boxes, axis=0) - - numbox = total_boxes.shape[0] - if numbox>0: - pick = nms(total_boxes.copy(), 0.7, 'Union') - total_boxes = total_boxes[pick,:] - regw = total_boxes[:,2]-total_boxes[:,0] - regh = total_boxes[:,3]-total_boxes[:,1] - qq1 = total_boxes[:,0]+total_boxes[:,5]*regw - qq2 = total_boxes[:,1]+total_boxes[:,6]*regh - qq3 = total_boxes[:,2]+total_boxes[:,7]*regw - qq4 = total_boxes[:,3]+total_boxes[:,8]*regh - total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) - total_boxes = rerec(total_boxes.copy()) - total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) - dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) - - numbox = total_boxes.shape[0] - if numbox>0: - # second stage - tempimg = np.zeros((24,24,3,numbox)) - for k in range(0,numbox): - tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) - tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] - if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: - tempimg[:,:,:,k] = imresample(tmp, (24, 24)) - else: - return np.empty() - tempimg = (tempimg-127.5)*0.0078125 - tempimg1 = np.transpose(tempimg, (3,1,0,2)) - out = rnet([tempimg1]) - out0 = np.transpose(out[0]) - out1 = np.transpose(out[1]) - score = out1[1,:] - ipass = np.where(score>threshold[1]) - total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) - mv = out0[:,ipass[0]] - if total_boxes.shape[0]>0: - pick = nms(total_boxes, 0.7, 'Union') - total_boxes = total_boxes[pick,:] - total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) - total_boxes = rerec(total_boxes.copy()) - - numbox = total_boxes.shape[0] - if numbox>0: - # third stage - total_boxes = np.fix(total_boxes).astype(np.int32) - dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) - tempimg = np.zeros((48,48,3,numbox)) - for k in range(0,numbox): - tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) - tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] - if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: - tempimg[:,:,:,k] = imresample(tmp, (48, 48)) - else: - return np.empty() - tempimg = (tempimg-127.5)*0.0078125 - tempimg1 = np.transpose(tempimg, (3,1,0,2)) - out = onet([tempimg1]) - out0 = np.transpose(out[0]) - out1 = np.transpose(out[1]) - out2 = np.transpose(out[2]) - score = out2[1,:] - points = out1 - ipass = np.where(score>threshold[2]) - points = points[:,ipass[0]] - total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) - mv = out0[:,ipass[0]] - - w = total_boxes[:,2]-total_boxes[:,0]+1 - h = total_boxes[:,3]-total_boxes[:,1]+1 - points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 - points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 - if total_boxes.shape[0]>0: - total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) - pick = nms(total_boxes.copy(), 0.7, 'Min') - total_boxes = total_boxes[pick,:] - points = points[:,pick] - - return total_boxes, points - - -# function [boundingbox] = bbreg(boundingbox,reg) -def bbreg(boundingbox,reg): - """Calibrate bounding boxes""" - if reg.shape[1]==1: - reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) - - w = boundingbox[:,2]-boundingbox[:,0]+1 - h = boundingbox[:,3]-boundingbox[:,1]+1 - b1 = boundingbox[:,0]+reg[:,0]*w - b2 = boundingbox[:,1]+reg[:,1]*h - b3 = boundingbox[:,2]+reg[:,2]*w - b4 = boundingbox[:,3]+reg[:,3]*h - boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) - return boundingbox - -def generateBoundingBox(imap, reg, scale, t): - """Use heatmap to generate bounding boxes""" - stride=2 - cellsize=12 - - imap = np.transpose(imap) - dx1 = np.transpose(reg[:,:,0]) - dy1 = np.transpose(reg[:,:,1]) - dx2 = np.transpose(reg[:,:,2]) - dy2 = np.transpose(reg[:,:,3]) - y, x = np.where(imap >= t) - if y.shape[0]==1: - dx1 = np.flipud(dx1) - dy1 = np.flipud(dy1) - dx2 = np.flipud(dx2) - dy2 = np.flipud(dy2) - score = imap[(y,x)] - reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) - if reg.size==0: - reg = np.empty((0,3)) - bb = np.transpose(np.vstack([y,x])) - q1 = np.fix((stride*bb+1)/scale) - q2 = np.fix((stride*bb+cellsize-1+1)/scale) - boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) - return boundingbox, reg - -# function pick = nms(boxes,threshold,type) -def nms(boxes, threshold, method): - if boxes.size==0: - return np.empty((0,3)) - x1 = boxes[:,0] - y1 = boxes[:,1] - x2 = boxes[:,2] - y2 = boxes[:,3] - s = boxes[:,4] - area = (x2-x1+1) * (y2-y1+1) - I = np.argsort(s) - pick = np.zeros_like(s, dtype=np.int16) - counter = 0 - while I.size>0: - i = I[-1] - pick[counter] = i - counter += 1 - idx = I[0:-1] - xx1 = np.maximum(x1[i], x1[idx]) - yy1 = np.maximum(y1[i], y1[idx]) - xx2 = np.minimum(x2[i], x2[idx]) - yy2 = np.minimum(y2[i], y2[idx]) - w = np.maximum(0.0, xx2-xx1+1) - h = np.maximum(0.0, yy2-yy1+1) - inter = w * h - if method == 'Min': - o = inter / np.minimum(area[i], area[idx]) - else: - o = inter / (area[i] + area[idx] - inter) - I = I[np.where(o<=threshold)] - pick = pick[0:counter] - return pick - -# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) -def pad(total_boxes, w, h): - """Compute the padding coordinates (pad the bounding boxes to square)""" - tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) - tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) - numbox = total_boxes.shape[0] - - dx = np.ones((numbox), dtype=np.int32) - dy = np.ones((numbox), dtype=np.int32) - edx = tmpw.copy().astype(np.int32) - edy = tmph.copy().astype(np.int32) - - x = total_boxes[:,0].copy().astype(np.int32) - y = total_boxes[:,1].copy().astype(np.int32) - ex = total_boxes[:,2].copy().astype(np.int32) - ey = total_boxes[:,3].copy().astype(np.int32) - - tmp = np.where(ex>w) - edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) - ex[tmp] = w - - tmp = np.where(ey>h) - edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) - ey[tmp] = h - - tmp = np.where(x<1) - dx.flat[tmp] = np.expand_dims(2-x[tmp],1) - x[tmp] = 1 - - tmp = np.where(y<1) - dy.flat[tmp] = np.expand_dims(2-y[tmp],1) - y[tmp] = 1 - - return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph - -# function [bboxA] = rerec(bboxA) -def rerec(bboxA): - """Convert bboxA to square.""" - h = bboxA[:,3]-bboxA[:,1] - w = bboxA[:,2]-bboxA[:,0] - l = np.maximum(w, h) - bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 - bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 - bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) - return bboxA - -def imresample(img, sz): - im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable - return im_data +import numpy as np +import os +import cv2 + +from pathlib import Path +from nnlib import nnlib + +class MTCExtractor(object): + def __init__(self): + self.scale_to = 1920 + + self.min_face_size = self.scale_to * 0.042 + self.thresh1 = 0.7 + self.thresh2 = 0.85 + self.thresh3 = 0.6 + self.scale_factor = 0.95 + + exec( nnlib.import_all(), locals(), globals() ) + PNet_Input = Input ( (None, None,3) ) + x = PNet_Input + x = Conv2D (10, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) + x = PReLU (shared_axes=[1,2], name="PReLU1" )(x) + x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) + x = Conv2D (16, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) + x = PReLU (shared_axes=[1,2], name="PReLU2" )(x) + x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) + x = PReLU (shared_axes=[1,2], name="PReLU3" )(x) + prob = Conv2D (2, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv41")(x) + prob = Softmax()(prob) + x = Conv2D (4, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv42")(x) + + PNet_model = Model(PNet_Input, [x,prob] ) + PNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_pnet.h5').__str__() ) + + RNet_Input = Input ( (24, 24, 3) ) + x = RNet_Input + x = Conv2D (28, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) + x = PReLU (shared_axes=[1,2], name="prelu1" )(x) + x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) + x = Conv2D (48, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) + x = PReLU (shared_axes=[1,2], name="prelu2" )(x) + x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) + x = Conv2D (64, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv3")(x) + x = PReLU (shared_axes=[1,2], name="prelu3" )(x) + x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) + x = Dense (128, name='conv4')(x) + x = PReLU (name="prelu4" )(x) + prob = Dense (2, name='conv51')(x) + prob = Softmax()(prob) + x = Dense (4, name='conv52')(x) + RNet_model = Model(RNet_Input, [x,prob] ) + RNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_rnet.h5').__str__() ) + + ONet_Input = Input ( (48, 48, 3) ) + x = ONet_Input + x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) + x = PReLU (shared_axes=[1,2], name="prelu1" )(x) + x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) + x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) + x = PReLU (shared_axes=[1,2], name="prelu2" )(x) + x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) + x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) + x = PReLU (shared_axes=[1,2], name="prelu3" )(x) + x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) + x = Conv2D (128, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv4")(x) + x = PReLU (shared_axes=[1,2], name="prelu4" )(x) + x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) + x = Dense (256, name='conv5')(x) + x = PReLU (name="prelu5" )(x) + prob = Dense (2, name='conv61')(x) + prob = Softmax()(prob) + x1 = Dense (4, name='conv62')(x) + x2 = Dense (10, name='conv63')(x) + ONet_model = Model(ONet_Input, [x1,x2,prob] ) + ONet_model.load_weights ( (Path(__file__).parent / 'mtcnn_onet.h5').__str__() ) + + self.pnet_fun = K.function ( PNet_model.inputs, PNet_model.outputs ) + self.rnet_fun = K.function ( RNet_model.inputs, RNet_model.outputs ) + self.onet_fun = K.function ( ONet_model.inputs, ONet_model.outputs ) + + def __enter__(self): + faces, pnts = detect_face ( np.zeros ( (self.scale_to, self.scale_to, 3)), self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) + + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def extract (self, input_image, is_bgr=True): + + if is_bgr: + input_image = input_image[:,:,::-1].copy() + is_bgr = False + + (h, w, ch) = input_image.shape + + input_scale = self.scale_to / max(w,h) + input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) + + detected_faces, pnts = detect_face ( input_image, self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) + detected_faces = [ ( int(face[0]/input_scale), int(face[1]/input_scale), int(face[2]/input_scale), int(face[3]/input_scale)) for face in detected_faces ] + + return detected_faces + +def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): + """Detects faces in an image, and returns bounding boxes and points for them. + img: input image + minsize: minimum faces' size + pnet, rnet, onet: caffemodel + threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold + factor: the factor used to create a scaling pyramid of face sizes to detect in the image. + """ + factor_count=0 + total_boxes=np.empty((0,9)) + points=np.empty(0) + h=img.shape[0] + w=img.shape[1] + minl=np.amin([h, w]) + m=12.0/minsize + minl=minl*m + # create scale pyramid + scales=[] + while minl>=12: + scales += [m*np.power(factor, factor_count)] + minl = minl*factor + factor_count += 1 + # first stage + for scale in scales: + hs=int(np.ceil(h*scale)) + ws=int(np.ceil(w*scale)) + #print ('scale %f %d %d' % (scale, ws,hs)) + im_data = imresample(img, (hs, ws)) + im_data = (im_data-127.5)*0.0078125 + img_x = np.expand_dims(im_data, 0) + img_y = np.transpose(img_x, (0,2,1,3)) + out = pnet([img_y]) + out0 = np.transpose(out[0], (0,2,1,3)) + out1 = np.transpose(out[1], (0,2,1,3)) + + boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) + + # inter-scale nms + pick = nms(boxes.copy(), 0.5, 'Union') + if boxes.size>0 and pick.size>0: + boxes = boxes[pick,:] + total_boxes = np.append(total_boxes, boxes, axis=0) + + numbox = total_boxes.shape[0] + if numbox>0: + pick = nms(total_boxes.copy(), 0.7, 'Union') + total_boxes = total_boxes[pick,:] + regw = total_boxes[:,2]-total_boxes[:,0] + regh = total_boxes[:,3]-total_boxes[:,1] + qq1 = total_boxes[:,0]+total_boxes[:,5]*regw + qq2 = total_boxes[:,1]+total_boxes[:,6]*regh + qq3 = total_boxes[:,2]+total_boxes[:,7]*regw + qq4 = total_boxes[:,3]+total_boxes[:,8]*regh + total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) + total_boxes = rerec(total_boxes.copy()) + total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) + + numbox = total_boxes.shape[0] + if numbox>0: + # second stage + tempimg = np.zeros((24,24,3,numbox)) + for k in range(0,numbox): + tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) + tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] + if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: + tempimg[:,:,:,k] = imresample(tmp, (24, 24)) + else: + return np.empty() + tempimg = (tempimg-127.5)*0.0078125 + tempimg1 = np.transpose(tempimg, (3,1,0,2)) + out = rnet([tempimg1]) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + score = out1[1,:] + ipass = np.where(score>threshold[1]) + total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) + mv = out0[:,ipass[0]] + if total_boxes.shape[0]>0: + pick = nms(total_boxes, 0.7, 'Union') + total_boxes = total_boxes[pick,:] + total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) + total_boxes = rerec(total_boxes.copy()) + + numbox = total_boxes.shape[0] + if numbox>0: + # third stage + total_boxes = np.fix(total_boxes).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) + tempimg = np.zeros((48,48,3,numbox)) + for k in range(0,numbox): + tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) + tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] + if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: + tempimg[:,:,:,k] = imresample(tmp, (48, 48)) + else: + return np.empty() + tempimg = (tempimg-127.5)*0.0078125 + tempimg1 = np.transpose(tempimg, (3,1,0,2)) + out = onet([tempimg1]) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + out2 = np.transpose(out[2]) + score = out2[1,:] + points = out1 + ipass = np.where(score>threshold[2]) + points = points[:,ipass[0]] + total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) + mv = out0[:,ipass[0]] + + w = total_boxes[:,2]-total_boxes[:,0]+1 + h = total_boxes[:,3]-total_boxes[:,1]+1 + points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 + points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 + if total_boxes.shape[0]>0: + total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) + pick = nms(total_boxes.copy(), 0.7, 'Min') + total_boxes = total_boxes[pick,:] + points = points[:,pick] + + return total_boxes, points + + +# function [boundingbox] = bbreg(boundingbox,reg) +def bbreg(boundingbox,reg): + """Calibrate bounding boxes""" + if reg.shape[1]==1: + reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) + + w = boundingbox[:,2]-boundingbox[:,0]+1 + h = boundingbox[:,3]-boundingbox[:,1]+1 + b1 = boundingbox[:,0]+reg[:,0]*w + b2 = boundingbox[:,1]+reg[:,1]*h + b3 = boundingbox[:,2]+reg[:,2]*w + b4 = boundingbox[:,3]+reg[:,3]*h + boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) + return boundingbox + +def generateBoundingBox(imap, reg, scale, t): + """Use heatmap to generate bounding boxes""" + stride=2 + cellsize=12 + + imap = np.transpose(imap) + dx1 = np.transpose(reg[:,:,0]) + dy1 = np.transpose(reg[:,:,1]) + dx2 = np.transpose(reg[:,:,2]) + dy2 = np.transpose(reg[:,:,3]) + y, x = np.where(imap >= t) + if y.shape[0]==1: + dx1 = np.flipud(dx1) + dy1 = np.flipud(dy1) + dx2 = np.flipud(dx2) + dy2 = np.flipud(dy2) + score = imap[(y,x)] + reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) + if reg.size==0: + reg = np.empty((0,3)) + bb = np.transpose(np.vstack([y,x])) + q1 = np.fix((stride*bb+1)/scale) + q2 = np.fix((stride*bb+cellsize-1+1)/scale) + boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) + return boundingbox, reg + +# function pick = nms(boxes,threshold,type) +def nms(boxes, threshold, method): + if boxes.size==0: + return np.empty((0,3)) + x1 = boxes[:,0] + y1 = boxes[:,1] + x2 = boxes[:,2] + y2 = boxes[:,3] + s = boxes[:,4] + area = (x2-x1+1) * (y2-y1+1) + I = np.argsort(s) + pick = np.zeros_like(s, dtype=np.int16) + counter = 0 + while I.size>0: + i = I[-1] + pick[counter] = i + counter += 1 + idx = I[0:-1] + xx1 = np.maximum(x1[i], x1[idx]) + yy1 = np.maximum(y1[i], y1[idx]) + xx2 = np.minimum(x2[i], x2[idx]) + yy2 = np.minimum(y2[i], y2[idx]) + w = np.maximum(0.0, xx2-xx1+1) + h = np.maximum(0.0, yy2-yy1+1) + inter = w * h + if method == 'Min': + o = inter / np.minimum(area[i], area[idx]) + else: + o = inter / (area[i] + area[idx] - inter) + I = I[np.where(o<=threshold)] + pick = pick[0:counter] + return pick + +# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) +def pad(total_boxes, w, h): + """Compute the padding coordinates (pad the bounding boxes to square)""" + tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) + tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) + numbox = total_boxes.shape[0] + + dx = np.ones((numbox), dtype=np.int32) + dy = np.ones((numbox), dtype=np.int32) + edx = tmpw.copy().astype(np.int32) + edy = tmph.copy().astype(np.int32) + + x = total_boxes[:,0].copy().astype(np.int32) + y = total_boxes[:,1].copy().astype(np.int32) + ex = total_boxes[:,2].copy().astype(np.int32) + ey = total_boxes[:,3].copy().astype(np.int32) + + tmp = np.where(ex>w) + edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) + ex[tmp] = w + + tmp = np.where(ey>h) + edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) + ey[tmp] = h + + tmp = np.where(x<1) + dx.flat[tmp] = np.expand_dims(2-x[tmp],1) + x[tmp] = 1 + + tmp = np.where(y<1) + dy.flat[tmp] = np.expand_dims(2-y[tmp],1) + y[tmp] = 1 + + return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph + +# function [bboxA] = rerec(bboxA) +def rerec(bboxA): + """Convert bboxA to square.""" + h = bboxA[:,3]-bboxA[:,1] + w = bboxA[:,2]-bboxA[:,0] + l = np.maximum(w, h) + bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 + bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 + bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) + return bboxA + +def imresample(img, sz): + im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable + return im_data diff --git a/facelib/PoseEstimator.py b/facelib/PoseEstimator.py index b0011ad..586f3ce 100644 --- a/facelib/PoseEstimator.py +++ b/facelib/PoseEstimator.py @@ -1,313 +1,313 @@ -import os -import pickle -from functools import partial -from pathlib import Path - -import cv2 -import numpy as np - -from interact import interact as io -from nnlib import nnlib - -""" -PoseEstimator estimates pitch, yaw, roll, from FAN aligned face. -trained on https://www.umdfaces.io -based on https://arxiv.org/pdf/1901.06778.pdf HYBRID COARSE-FINE CLASSIFICATION FOR HEAD POSE ESTIMATION -""" - -class PoseEstimator(object): - VERSION = 1 - def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): - exec( nnlib.import_all(), locals(), globals() ) - self.resolution = resolution - - self.angles = [60, 45, 30, 10, 2] - self.alpha_cat_losses = [7,5,3,1,1] - self.class_nums = [ angle+1 for angle in self.angles ] - self.encoder, self.decoder, self.model_l = PoseEstimator.BuildModels(resolution, class_nums=self.class_nums) - - if weights_file_root is not None: - weights_file_root = Path(weights_file_root) - else: - weights_file_root = Path(__file__).parent - - self.encoder_weights_path = weights_file_root / ('PoseEst_%d_%s_enc.h5' % (resolution, face_type_str) ) - self.decoder_weights_path = weights_file_root / ('PoseEst_%d_%s_dec.h5' % (resolution, face_type_str) ) - self.l_weights_path = weights_file_root / ('PoseEst_%d_%s_l.h5' % (resolution, face_type_str) ) - - self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) ) - - self.input_bgr_shape = (resolution, resolution, 3) - - def ResamplerFunc(input): - mean_t, logvar_t = input - return mean_t + K.exp(0.5*logvar_t)*K.random_normal(K.shape(mean_t)) - - self.BVAEResampler = Lambda ( lambda x: x[0] + K.exp(0.5*x[1])*K.random_normal(K.shape(x[0])), - output_shape=K.int_shape(self.encoder.outputs[0])[1:] ) - - inp_t = Input (self.input_bgr_shape) - inp_real_t = Input (self.input_bgr_shape) - inp_pitch_t = Input ( (1,) ) - inp_yaw_t = Input ( (1,) ) - inp_roll_t = Input ( (1,) ) - - - mean_t, logvar_t = self.encoder(inp_t) - - latent_t = self.BVAEResampler([mean_t, logvar_t]) - - if training: - bgr_t = self.decoder (latent_t) - pyrs_t = self.model_l(latent_t) - else: - self.model = Model(inp_t, self.model_l(latent_t) ) - pyrs_t = self.model(inp_t) - - if load_weights: - if training: - self.encoder.load_weights (str(self.encoder_weights_path)) - self.decoder.load_weights (str(self.decoder_weights_path)) - self.model_l.load_weights (str(self.l_weights_path)) - else: - self.model.load_weights (str(self.model_weights_path)) - - else: - def gather_Conv2D_layers(models_list): - conv_weights_list = [] - for model in models_list: - for layer in model.layers: - layer_type = type(layer) - if layer_type == keras.layers.Conv2D: - conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights - elif layer_type == keras.engine.training.Model: - conv_weights_list += gather_Conv2D_layers ([layer]) - return conv_weights_list - - CAInitializerMP ( gather_Conv2D_layers( [self.encoder, self.decoder] ) ) - - - if training: - inp_pyrs_t = [] - for class_num in self.class_nums: - inp_pyrs_t += [ Input ((3,)) ] - - pyr_loss = [] - - for i,class_num in enumerate(self.class_nums): - a = self.alpha_cat_losses[i] - pyr_loss += [ a*K.mean( K.square ( inp_pyrs_t[i] - pyrs_t[i]) ) ] - - def BVAELoss(beta=4): - #keep in mind loss per sample, not per minibatch - def func(input): - mean_t, logvar_t = input - return beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True ) - return func - - BVAE_loss = BVAELoss(4)([mean_t, logvar_t])#beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True ) - - - bgr_loss = K.mean(K.square(inp_real_t-bgr_t), axis=0, keepdims=True) - - #train_loss = BVAE_loss + bgr_loss - - pyr_loss = sum(pyr_loss) - - - self.train = K.function ([inp_t, inp_real_t], - [ K.mean (BVAE_loss)+K.mean(bgr_loss) ], Adam(lr=0.0005, beta_1=0.9, beta_2=0.999).get_updates( [BVAE_loss, bgr_loss], self.encoder.trainable_weights+self.decoder.trainable_weights ) ) - - self.train_l = K.function ([inp_t] + inp_pyrs_t, - [pyr_loss], Adam(lr=0.0001).get_updates( pyr_loss, self.model_l.trainable_weights) ) - - - self.view = K.function ([inp_t], [ bgr_t, pyrs_t[0] ] ) - - def __enter__(self): - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def save_weights(self): - self.encoder.save_weights (str(self.encoder_weights_path)) - self.decoder.save_weights (str(self.decoder_weights_path)) - self.model_l.save_weights (str(self.l_weights_path)) - - inp_t = Input (self.input_bgr_shape) - - Model(inp_t, self.model_l(self.BVAEResampler(self.encoder(inp_t))) ).save_weights (str(self.model_weights_path)) - - def train_on_batch(self, warps, imgs, pyr_tanh, skip_bgr_train=False): - - if not skip_bgr_train: - bgr_loss, = self.train( [warps, imgs] ) - pyr_loss = 0 - else: - bgr_loss = 0 - - feed = [imgs] - for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)): - a = angle / 2 - c = np.round( (pyr_tanh+1) * a ) / a -1 #.astype(K.floatx()) - feed += [c] - - pyr_loss, = self.train_l(feed) - - return bgr_loss, pyr_loss - - def extract (self, input_image, is_input_tanh=False): - if is_input_tanh: - raise NotImplemented("is_input_tanh") - - input_shape_len = len(input_image.shape) - if input_shape_len == 3: - input_image = input_image[np.newaxis,...] - - bgr, result, = self.view( [input_image] ) - - - #result = np.clip ( result / (self.angles[0] / 2) - 1, 0.0, 1.0 ) - - if input_shape_len == 3: - bgr = bgr[0] - result = result[0] - - return bgr, result - - @staticmethod - def BuildModels ( resolution, class_nums, ae_dims=128): - exec( nnlib.import_all(), locals(), globals() ) - - x = inp = Input ( (resolution,resolution,3) ) - x = PoseEstimator.EncFlow(ae_dims)(x) - encoder = Model(inp,x) - - x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) - x = PoseEstimator.DecFlow(resolution, ae_dims)(x) - decoder = Model(inp,x) - - x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) - x = PoseEstimator.LatentFlow(class_nums=class_nums)(x) - model_l = Model(inp, x ) - - return encoder, decoder, model_l - - @staticmethod - def EncFlow(ae_dims): - exec( nnlib.import_all(), locals(), globals() ) - - XConv2D = partial(Conv2D, padding='zero') - - - def downscale (dim, **kwargs): - def func(x): - return ReLU() ( ( XConv2D(dim, kernel_size=4, strides=2)(x)) ) - return func - - - downscale = partial(downscale) - - ed_ch_dims = 128 - - def func(input): - x = input - x = downscale(64)(x) - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = Flatten()(x) - - x = Dense(256)(x) - x = ReLU()(x) - - x = Dense(256)(x) - x = ReLU()(x) - - mean = Dense(ae_dims)(x) - logvar = Dense(ae_dims)(x) - - return mean, logvar - - return func - - @staticmethod - def DecFlow(resolution, ae_dims): - exec( nnlib.import_all(), locals(), globals() ) - - XConv2D = partial(Conv2D, padding='zero') - - def upscale (dim, strides=2, **kwargs): - def func(x): - return ReLU()( ( Conv2DTranspose(dim, kernel_size=4, strides=strides, padding='same')(x)) ) - return func - - def to_bgr (output_nc, **kwargs): - def func(x): - return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x) - return func - - upscale = partial(upscale) - lowest_dense_res = resolution // 16 - - def func(input): - x = input - - x = Dense(256)(x) - x = ReLU()(x) - - x = Dense(256)(x) - x = ReLU()(x) - - x = Dense( (lowest_dense_res*lowest_dense_res*256) ) (x) - x = ReLU()(x) - - x = Reshape( (lowest_dense_res,lowest_dense_res,256) )(x) - - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - x = upscale(64)(x) - x = to_bgr(3)(x) - - return x - return func - - @staticmethod - def LatentFlow(class_nums): - exec( nnlib.import_all(), locals(), globals() ) - - XConv2D = partial(Conv2D, padding='zero') - - def func(latent): - x = latent - - x = Dense(1024, activation='relu')(x) - x = Dropout(0.5)(x) - x = Dense(1024, activation='relu')(x) - # x = Dropout(0.5)(x) - # x = Dense(4096, activation='relu')(x) - - output = [] - for class_num in class_nums: - pyr = Dense(3, activation='tanh')(x) - output += [pyr] - - return output - - #y = Dropout(0.5)(y) - #y = Dense(1024, activation='relu')(y) - return func - - -# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg') -# x = resnet50(x) -# output = [] -# for class_num in class_nums: -# pitch = Dense(class_num)(x) -# yaw = Dense(class_num)(x) -# roll = Dense(class_num)(x) -# output += [pitch,yaw,roll] - -# return output +import os +import pickle +from functools import partial +from pathlib import Path + +import cv2 +import numpy as np + +from interact import interact as io +from nnlib import nnlib + +""" +PoseEstimator estimates pitch, yaw, roll, from FAN aligned face. +trained on https://www.umdfaces.io +based on https://arxiv.org/pdf/1901.06778.pdf HYBRID COARSE-FINE CLASSIFICATION FOR HEAD POSE ESTIMATION +""" + +class PoseEstimator(object): + VERSION = 1 + def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): + exec( nnlib.import_all(), locals(), globals() ) + self.resolution = resolution + + self.angles = [60, 45, 30, 10, 2] + self.alpha_cat_losses = [7,5,3,1,1] + self.class_nums = [ angle+1 for angle in self.angles ] + self.encoder, self.decoder, self.model_l = PoseEstimator.BuildModels(resolution, class_nums=self.class_nums) + + if weights_file_root is not None: + weights_file_root = Path(weights_file_root) + else: + weights_file_root = Path(__file__).parent + + self.encoder_weights_path = weights_file_root / ('PoseEst_%d_%s_enc.h5' % (resolution, face_type_str) ) + self.decoder_weights_path = weights_file_root / ('PoseEst_%d_%s_dec.h5' % (resolution, face_type_str) ) + self.l_weights_path = weights_file_root / ('PoseEst_%d_%s_l.h5' % (resolution, face_type_str) ) + + self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) ) + + self.input_bgr_shape = (resolution, resolution, 3) + + def ResamplerFunc(input): + mean_t, logvar_t = input + return mean_t + K.exp(0.5*logvar_t)*K.random_normal(K.shape(mean_t)) + + self.BVAEResampler = Lambda ( lambda x: x[0] + K.exp(0.5*x[1])*K.random_normal(K.shape(x[0])), + output_shape=K.int_shape(self.encoder.outputs[0])[1:] ) + + inp_t = Input (self.input_bgr_shape) + inp_real_t = Input (self.input_bgr_shape) + inp_pitch_t = Input ( (1,) ) + inp_yaw_t = Input ( (1,) ) + inp_roll_t = Input ( (1,) ) + + + mean_t, logvar_t = self.encoder(inp_t) + + latent_t = self.BVAEResampler([mean_t, logvar_t]) + + if training: + bgr_t = self.decoder (latent_t) + pyrs_t = self.model_l(latent_t) + else: + self.model = Model(inp_t, self.model_l(latent_t) ) + pyrs_t = self.model(inp_t) + + if load_weights: + if training: + self.encoder.load_weights (str(self.encoder_weights_path)) + self.decoder.load_weights (str(self.decoder_weights_path)) + self.model_l.load_weights (str(self.l_weights_path)) + else: + self.model.load_weights (str(self.model_weights_path)) + + else: + def gather_Conv2D_layers(models_list): + conv_weights_list = [] + for model in models_list: + for layer in model.layers: + layer_type = type(layer) + if layer_type == keras.layers.Conv2D: + conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights + elif layer_type == keras.engine.training.Model: + conv_weights_list += gather_Conv2D_layers ([layer]) + return conv_weights_list + + CAInitializerMP ( gather_Conv2D_layers( [self.encoder, self.decoder] ) ) + + + if training: + inp_pyrs_t = [] + for class_num in self.class_nums: + inp_pyrs_t += [ Input ((3,)) ] + + pyr_loss = [] + + for i,class_num in enumerate(self.class_nums): + a = self.alpha_cat_losses[i] + pyr_loss += [ a*K.mean( K.square ( inp_pyrs_t[i] - pyrs_t[i]) ) ] + + def BVAELoss(beta=4): + #keep in mind loss per sample, not per minibatch + def func(input): + mean_t, logvar_t = input + return beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True ) + return func + + BVAE_loss = BVAELoss(4)([mean_t, logvar_t])#beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True ) + + + bgr_loss = K.mean(K.square(inp_real_t-bgr_t), axis=0, keepdims=True) + + #train_loss = BVAE_loss + bgr_loss + + pyr_loss = sum(pyr_loss) + + + self.train = K.function ([inp_t, inp_real_t], + [ K.mean (BVAE_loss)+K.mean(bgr_loss) ], Adam(lr=0.0005, beta_1=0.9, beta_2=0.999).get_updates( [BVAE_loss, bgr_loss], self.encoder.trainable_weights+self.decoder.trainable_weights ) ) + + self.train_l = K.function ([inp_t] + inp_pyrs_t, + [pyr_loss], Adam(lr=0.0001).get_updates( pyr_loss, self.model_l.trainable_weights) ) + + + self.view = K.function ([inp_t], [ bgr_t, pyrs_t[0] ] ) + + def __enter__(self): + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def save_weights(self): + self.encoder.save_weights (str(self.encoder_weights_path)) + self.decoder.save_weights (str(self.decoder_weights_path)) + self.model_l.save_weights (str(self.l_weights_path)) + + inp_t = Input (self.input_bgr_shape) + + Model(inp_t, self.model_l(self.BVAEResampler(self.encoder(inp_t))) ).save_weights (str(self.model_weights_path)) + + def train_on_batch(self, warps, imgs, pyr_tanh, skip_bgr_train=False): + + if not skip_bgr_train: + bgr_loss, = self.train( [warps, imgs] ) + pyr_loss = 0 + else: + bgr_loss = 0 + + feed = [imgs] + for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)): + a = angle / 2 + c = np.round( (pyr_tanh+1) * a ) / a -1 #.astype(K.floatx()) + feed += [c] + + pyr_loss, = self.train_l(feed) + + return bgr_loss, pyr_loss + + def extract (self, input_image, is_input_tanh=False): + if is_input_tanh: + raise NotImplemented("is_input_tanh") + + input_shape_len = len(input_image.shape) + if input_shape_len == 3: + input_image = input_image[np.newaxis,...] + + bgr, result, = self.view( [input_image] ) + + + #result = np.clip ( result / (self.angles[0] / 2) - 1, 0.0, 1.0 ) + + if input_shape_len == 3: + bgr = bgr[0] + result = result[0] + + return bgr, result + + @staticmethod + def BuildModels ( resolution, class_nums, ae_dims=128): + exec( nnlib.import_all(), locals(), globals() ) + + x = inp = Input ( (resolution,resolution,3) ) + x = PoseEstimator.EncFlow(ae_dims)(x) + encoder = Model(inp,x) + + x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) + x = PoseEstimator.DecFlow(resolution, ae_dims)(x) + decoder = Model(inp,x) + + x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) + x = PoseEstimator.LatentFlow(class_nums=class_nums)(x) + model_l = Model(inp, x ) + + return encoder, decoder, model_l + + @staticmethod + def EncFlow(ae_dims): + exec( nnlib.import_all(), locals(), globals() ) + + XConv2D = partial(Conv2D, padding='zero') + + + def downscale (dim, **kwargs): + def func(x): + return ReLU() ( ( XConv2D(dim, kernel_size=4, strides=2)(x)) ) + return func + + + downscale = partial(downscale) + + ed_ch_dims = 128 + + def func(input): + x = input + x = downscale(64)(x) + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = Flatten()(x) + + x = Dense(256)(x) + x = ReLU()(x) + + x = Dense(256)(x) + x = ReLU()(x) + + mean = Dense(ae_dims)(x) + logvar = Dense(ae_dims)(x) + + return mean, logvar + + return func + + @staticmethod + def DecFlow(resolution, ae_dims): + exec( nnlib.import_all(), locals(), globals() ) + + XConv2D = partial(Conv2D, padding='zero') + + def upscale (dim, strides=2, **kwargs): + def func(x): + return ReLU()( ( Conv2DTranspose(dim, kernel_size=4, strides=strides, padding='same')(x)) ) + return func + + def to_bgr (output_nc, **kwargs): + def func(x): + return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x) + return func + + upscale = partial(upscale) + lowest_dense_res = resolution // 16 + + def func(input): + x = input + + x = Dense(256)(x) + x = ReLU()(x) + + x = Dense(256)(x) + x = ReLU()(x) + + x = Dense( (lowest_dense_res*lowest_dense_res*256) ) (x) + x = ReLU()(x) + + x = Reshape( (lowest_dense_res,lowest_dense_res,256) )(x) + + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + x = upscale(64)(x) + x = to_bgr(3)(x) + + return x + return func + + @staticmethod + def LatentFlow(class_nums): + exec( nnlib.import_all(), locals(), globals() ) + + XConv2D = partial(Conv2D, padding='zero') + + def func(latent): + x = latent + + x = Dense(1024, activation='relu')(x) + x = Dropout(0.5)(x) + x = Dense(1024, activation='relu')(x) + # x = Dropout(0.5)(x) + # x = Dense(4096, activation='relu')(x) + + output = [] + for class_num in class_nums: + pyr = Dense(3, activation='tanh')(x) + output += [pyr] + + return output + + #y = Dropout(0.5)(y) + #y = Dense(1024, activation='relu')(y) + return func + + +# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg') +# x = resnet50(x) +# output = [] +# for class_num in class_nums: +# pitch = Dense(class_num)(x) +# yaw = Dense(class_num)(x) +# roll = Dense(class_num)(x) +# output += [pitch,yaw,roll] + +# return output diff --git a/facelib/S3FDExtractor.py b/facelib/S3FDExtractor.py index 59e42f1..3487ac6 100644 --- a/facelib/S3FDExtractor.py +++ b/facelib/S3FDExtractor.py @@ -1,98 +1,98 @@ -import numpy as np -from pathlib import Path -import cv2 -from nnlib import nnlib - -class S3FDExtractor(object): - def __init__(self): - exec( nnlib.import_all(), locals(), globals() ) - - model_path = Path(__file__).parent / "S3FD.h5" - if not model_path.exists(): - return None - - self.model = nnlib.keras.models.load_model ( str(model_path) ) - - def __enter__(self): - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def extract (self, input_image, is_bgr=True): - - if is_bgr: - input_image = input_image[:,:,::-1] - is_bgr = False - - (h, w, ch) = input_image.shape - - d = max(w, h) - scale_to = 640 if d >= 1280 else d / 2 - scale_to = max(64, scale_to) - - input_scale = d / scale_to - input_image = cv2.resize (input_image, ( int(w/input_scale), int(h/input_scale) ), interpolation=cv2.INTER_LINEAR) - - olist = self.model.predict( np.expand_dims(input_image,0) ) - - detected_faces = [] - for ltrb in self.refine (olist): - l,t,r,b = [ x*input_scale for x in ltrb] - bt = b-t - if min(r-l,bt) < 40: #filtering faces < 40pix by any side - continue - b += bt*0.1 #enlarging bottom line a bit for 2DFAN-4, because default is not enough covering a chin - detected_faces.append ( [int(x) for x in (l,t,r,b) ] ) - - return detected_faces - - def refine(self, olist): - bboxlist = [] - for i, ((ocls,), (oreg,)) in enumerate ( zip ( olist[::2], olist[1::2] ) ): - stride = 2**(i + 2) # 4,8,16,32,64,128 - s_d2 = stride / 2 - s_m4 = stride * 4 - - for hindex, windex in zip(*np.where(ocls > 0.05)): - score = ocls[hindex, windex] - loc = oreg[hindex, windex, :] - priors = np.array([windex * stride + s_d2, hindex * stride + s_d2, s_m4, s_m4]) - priors_2p = priors[2:] - box = np.concatenate((priors[:2] + loc[:2] * 0.1 * priors_2p, - priors_2p * np.exp(loc[2:] * 0.2)) ) - box[:2] -= box[2:] / 2 - box[2:] += box[:2] - - bboxlist.append([*box, score]) - - bboxlist = np.array(bboxlist) - if len(bboxlist) == 0: - bboxlist = np.zeros((1, 5)) - - bboxlist = bboxlist[self.refine_nms(bboxlist, 0.3), :] - bboxlist = [ x[:-1].astype(np.int) for x in bboxlist if x[-1] >= 0.5] - return bboxlist - - def refine_nms(self, dets, thresh): - keep = list() - if len(dets) == 0: - return keep - - x_1, y_1, x_2, y_2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4] - areas = (x_2 - x_1 + 1) * (y_2 - y_1 + 1) - order = scores.argsort()[::-1] - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - xx_1, yy_1 = np.maximum(x_1[i], x_1[order[1:]]), np.maximum(y_1[i], y_1[order[1:]]) - xx_2, yy_2 = np.minimum(x_2[i], x_2[order[1:]]), np.minimum(y_2[i], y_2[order[1:]]) - - width, height = np.maximum(0.0, xx_2 - xx_1 + 1), np.maximum(0.0, yy_2 - yy_1 + 1) - ovr = width * height / (areas[i] + areas[order[1:]] - width * height) - - inds = np.where(ovr <= thresh)[0] - order = order[inds + 1] - return keep +import numpy as np +from pathlib import Path +import cv2 +from nnlib import nnlib + +class S3FDExtractor(object): + def __init__(self): + exec( nnlib.import_all(), locals(), globals() ) + + model_path = Path(__file__).parent / "S3FD.h5" + if not model_path.exists(): + return None + + self.model = nnlib.keras.models.load_model ( str(model_path) ) + + def __enter__(self): + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def extract (self, input_image, is_bgr=True): + + if is_bgr: + input_image = input_image[:,:,::-1] + is_bgr = False + + (h, w, ch) = input_image.shape + + d = max(w, h) + scale_to = 640 if d >= 1280 else d / 2 + scale_to = max(64, scale_to) + + input_scale = d / scale_to + input_image = cv2.resize (input_image, ( int(w/input_scale), int(h/input_scale) ), interpolation=cv2.INTER_LINEAR) + + olist = self.model.predict( np.expand_dims(input_image,0) ) + + detected_faces = [] + for ltrb in self.refine (olist): + l,t,r,b = [ x*input_scale for x in ltrb] + bt = b-t + if min(r-l,bt) < 40: #filtering faces < 40pix by any side + continue + b += bt*0.1 #enlarging bottom line a bit for 2DFAN-4, because default is not enough covering a chin + detected_faces.append ( [int(x) for x in (l,t,r,b) ] ) + + return detected_faces + + def refine(self, olist): + bboxlist = [] + for i, ((ocls,), (oreg,)) in enumerate ( zip ( olist[::2], olist[1::2] ) ): + stride = 2**(i + 2) # 4,8,16,32,64,128 + s_d2 = stride / 2 + s_m4 = stride * 4 + + for hindex, windex in zip(*np.where(ocls > 0.05)): + score = ocls[hindex, windex] + loc = oreg[hindex, windex, :] + priors = np.array([windex * stride + s_d2, hindex * stride + s_d2, s_m4, s_m4]) + priors_2p = priors[2:] + box = np.concatenate((priors[:2] + loc[:2] * 0.1 * priors_2p, + priors_2p * np.exp(loc[2:] * 0.2)) ) + box[:2] -= box[2:] / 2 + box[2:] += box[:2] + + bboxlist.append([*box, score]) + + bboxlist = np.array(bboxlist) + if len(bboxlist) == 0: + bboxlist = np.zeros((1, 5)) + + bboxlist = bboxlist[self.refine_nms(bboxlist, 0.3), :] + bboxlist = [ x[:-1].astype(np.int) for x in bboxlist if x[-1] >= 0.5] + return bboxlist + + def refine_nms(self, dets, thresh): + keep = list() + if len(dets) == 0: + return keep + + x_1, y_1, x_2, y_2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4] + areas = (x_2 - x_1 + 1) * (y_2 - y_1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx_1, yy_1 = np.maximum(x_1[i], x_1[order[1:]]), np.maximum(y_1[i], y_1[order[1:]]) + xx_2, yy_2 = np.minimum(x_2[i], x_2[order[1:]]), np.minimum(y_2[i], y_2[order[1:]]) + + width, height = np.maximum(0.0, xx_2 - xx_1 + 1), np.maximum(0.0, yy_2 - yy_1 + 1) + ovr = width * height / (areas[i] + areas[order[1:]] - width * height) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + return keep diff --git a/facelib/__init__.py b/facelib/__init__.py index 1e89d43..89528ab 100644 --- a/facelib/__init__.py +++ b/facelib/__init__.py @@ -1,7 +1,7 @@ -from .FaceType import FaceType -from .DLIBExtractor import DLIBExtractor -from .MTCExtractor import MTCExtractor -from .S3FDExtractor import S3FDExtractor -from .LandmarksExtractor import LandmarksExtractor -from .FANSegmentator import FANSegmentator +from .FaceType import FaceType +from .DLIBExtractor import DLIBExtractor +from .MTCExtractor import MTCExtractor +from .S3FDExtractor import S3FDExtractor +from .LandmarksExtractor import LandmarksExtractor +from .FANSegmentator import FANSegmentator from .PoseEstimator import PoseEstimator \ No newline at end of file diff --git a/imagelib/DCSCN.py b/imagelib/DCSCN.py index 332e70a..8978211 100644 --- a/imagelib/DCSCN.py +++ b/imagelib/DCSCN.py @@ -1,164 +1,164 @@ -import numpy as np -import cv2 -from pathlib import Path -from nnlib import nnlib -from interact import interact as io - -class DCSCN(): - def __init__(self): - exec( nnlib.import_all(), locals(), globals() ) - - inp_x = KL.Input([None, None, 1]) - inp_x2 = KL.Input([None, None, 1]) - - x = inp_x - layers_count = 12 - layers = [] - for i in range(1,layers_count+1): - if i == 1: - output_feature_num = 196 - else: - x1 = (i-1) / float(layers_count - 1) - y1 = x1 ** (1.0 / 1.5) - output_feature_num = int((196 - 48) * (1 - y1) + 48) - x = Conv2D(output_feature_num, kernel_size=3, strides=1, padding='same', name='CNN%d' % (i) ) (x) - x = PReLU(shared_axes=[1,2], name='CNN%d_prelu' % (i) ) (x) - layers.append(x) - - x_concat = KL.Concatenate()(layers) - - A1 = Conv2D(64, kernel_size=1, strides=1, padding='same', name='A1' ) (x_concat) - A1 = PReLU(shared_axes=[1,2], name='A1_prelu') (A1) - - B1 = Conv2D(32, kernel_size=1, strides=1, padding='same', name='B1' ) (x_concat) - B1 = PReLU(shared_axes=[1,2], name='B1_prelu') (B1) - - B2 = Conv2D(32, kernel_size=3, strides=1, padding='same', name='B2' ) (B1) - B2 = PReLU(shared_axes=[1,2], name='B2_prelu') (B2) - - x = KL.Concatenate()([B2,A1]) - x = Conv2D(96*4, kernel_size=3, strides=1, padding='same', name='Up_PS' )(x) - x = PixelShuffler()(x) - x = Conv2D(1, kernel_size=3, strides=1, padding='same', name='R_CNN1', use_bias=False )(x) - x = KL.Add()([x, inp_x2]) - self.model = keras.models.Model ([inp_x, inp_x2], [x]) - self.model.load_weights ( Path(__file__).parent / 'DCSCN.h5' ) - - def upscale(self, img, is_bgr=True, is_float=True): - if is_bgr: - img = img[...,::-1] - - if is_float: - img = np.clip (img*255, 0, 255) - - img_shape_len = len(img.shape) - h, w = img.shape[:2] - ch = img.shape[2] if len(img.shape) >= 3 else 1 - - nh, nw = h*2, w*2 - - img_x = self.convert_rgb_to_y(img) - - img_bx = cv2.resize(img_x, (nh, nw), cv2.INTER_CUBIC) - - ensemble = 8 - - output = np.zeros([nh,nw,1], dtype=np.float32) - - for i in range(ensemble): - x = np.reshape( self.flip(img_x, i), (1,h,w,1) ) - bx = np.reshape( self.flip(img_bx, i), (1,nh,nw,1) ) - y = self.model.predict([x,bx])[0] - y = self.flip(y, i, invert=True) - output += y - - output /= ensemble - - bimg = cv2.resize(img, (nh, nw), cv2.INTER_CUBIC) - bimg_ycbcr = self.convert_rgb_to_ycbcr(bimg) - - if ch > 1: - output = self.convert_y_and_cbcr_to_rgb(output, bimg_ycbcr[:, :, 1:3]) - - if is_float: - output = np.clip (output/255.0, 0, 1.0) - - if is_bgr: - output = output[...,::-1] - - return output - - def convert_rgb_to_y(self, image): - if len(image.shape) <= 2 or image.shape[2] == 1: - return image - - xform = np.array([[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0]], dtype=np.float32) - y_image = image.dot(xform.T) + 16.0 - - return y_image - - - def convert_rgb_to_ycbcr(self, image): - if len(image.shape) <= 2 or image.shape[2] == 1: - return image - - xform = np.array( - [[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0], - [- 37.945 / 256.0, - 74.494 / 256.0, 112.439 / 256.0], - [112.439 / 256.0, - 94.154 / 256.0, - 18.285 / 256.0]], dtype=np.float32) - - ycbcr_image = image.dot(xform.T) - ycbcr_image[:, :, 0] += 16.0 - ycbcr_image[:, :, [1, 2]] += 128.0 - - return ycbcr_image - - def convert_ycbcr_to_rgb(self,ycbcr_image): - rgb_image = np.zeros([ycbcr_image.shape[0], ycbcr_image.shape[1], 3], dtype=np.float32) - - rgb_image[:, :, 0] = ycbcr_image[:, :, 0] - 16.0 - rgb_image[:, :, [1, 2]] = ycbcr_image[:, :, [1, 2]] - 128.0 - xform = np.array( - [[298.082 / 256.0, 0, 408.583 / 256.0], - [298.082 / 256.0, -100.291 / 256.0, -208.120 / 256.0], - [298.082 / 256.0, 516.412 / 256.0, 0]], dtype=np.float32) - rgb_image = rgb_image.dot(xform.T) - - return rgb_image - - def convert_y_and_cbcr_to_rgb(self,y_image, cbcr_image): - if len(y_image.shape) <= 2: - y_image = y_image.reshape[y_image.shape[0], y_image.shape[1], 1] - - if len(y_image.shape) == 3 and y_image.shape[2] == 3: - y_image = y_image[:, :, 0:1] - - ycbcr_image = np.zeros([y_image.shape[0], y_image.shape[1], 3], dtype=np.float32) - ycbcr_image[:, :, 0] = y_image[:, :, 0] - ycbcr_image[:, :, 1:3] = cbcr_image[:, :, 0:2] - - return self.convert_ycbcr_to_rgb(ycbcr_image) - - def flip(self, image, flip_type, invert=False): - if flip_type == 0: - return image - elif flip_type == 1: - return np.flipud(image) - elif flip_type == 2: - return np.fliplr(image) - elif flip_type == 3: - return np.flipud(np.fliplr(image)) - elif flip_type == 4: - return np.rot90(image, 1 if invert is False else -1) - elif flip_type == 5: - return np.rot90(image, -1 if invert is False else 1) - elif flip_type == 6: - if invert is False: - return np.flipud(np.rot90(image)) - else: - return np.rot90(np.flipud(image), -1) - elif flip_type == 7: - if invert is False: - return np.flipud(np.rot90(image, -1)) - else: - return np.rot90(np.flipud(image), 1) +import numpy as np +import cv2 +from pathlib import Path +from nnlib import nnlib +from interact import interact as io + +class DCSCN(): + def __init__(self): + exec( nnlib.import_all(), locals(), globals() ) + + inp_x = KL.Input([None, None, 1]) + inp_x2 = KL.Input([None, None, 1]) + + x = inp_x + layers_count = 12 + layers = [] + for i in range(1,layers_count+1): + if i == 1: + output_feature_num = 196 + else: + x1 = (i-1) / float(layers_count - 1) + y1 = x1 ** (1.0 / 1.5) + output_feature_num = int((196 - 48) * (1 - y1) + 48) + x = Conv2D(output_feature_num, kernel_size=3, strides=1, padding='same', name='CNN%d' % (i) ) (x) + x = PReLU(shared_axes=[1,2], name='CNN%d_prelu' % (i) ) (x) + layers.append(x) + + x_concat = KL.Concatenate()(layers) + + A1 = Conv2D(64, kernel_size=1, strides=1, padding='same', name='A1' ) (x_concat) + A1 = PReLU(shared_axes=[1,2], name='A1_prelu') (A1) + + B1 = Conv2D(32, kernel_size=1, strides=1, padding='same', name='B1' ) (x_concat) + B1 = PReLU(shared_axes=[1,2], name='B1_prelu') (B1) + + B2 = Conv2D(32, kernel_size=3, strides=1, padding='same', name='B2' ) (B1) + B2 = PReLU(shared_axes=[1,2], name='B2_prelu') (B2) + + x = KL.Concatenate()([B2,A1]) + x = Conv2D(96*4, kernel_size=3, strides=1, padding='same', name='Up_PS' )(x) + x = PixelShuffler()(x) + x = Conv2D(1, kernel_size=3, strides=1, padding='same', name='R_CNN1', use_bias=False )(x) + x = KL.Add()([x, inp_x2]) + self.model = keras.models.Model ([inp_x, inp_x2], [x]) + self.model.load_weights ( Path(__file__).parent / 'DCSCN.h5' ) + + def upscale(self, img, is_bgr=True, is_float=True): + if is_bgr: + img = img[...,::-1] + + if is_float: + img = np.clip (img*255, 0, 255) + + img_shape_len = len(img.shape) + h, w = img.shape[:2] + ch = img.shape[2] if len(img.shape) >= 3 else 1 + + nh, nw = h*2, w*2 + + img_x = self.convert_rgb_to_y(img) + + img_bx = cv2.resize(img_x, (nh, nw), cv2.INTER_CUBIC) + + ensemble = 8 + + output = np.zeros([nh,nw,1], dtype=np.float32) + + for i in range(ensemble): + x = np.reshape( self.flip(img_x, i), (1,h,w,1) ) + bx = np.reshape( self.flip(img_bx, i), (1,nh,nw,1) ) + y = self.model.predict([x,bx])[0] + y = self.flip(y, i, invert=True) + output += y + + output /= ensemble + + bimg = cv2.resize(img, (nh, nw), cv2.INTER_CUBIC) + bimg_ycbcr = self.convert_rgb_to_ycbcr(bimg) + + if ch > 1: + output = self.convert_y_and_cbcr_to_rgb(output, bimg_ycbcr[:, :, 1:3]) + + if is_float: + output = np.clip (output/255.0, 0, 1.0) + + if is_bgr: + output = output[...,::-1] + + return output + + def convert_rgb_to_y(self, image): + if len(image.shape) <= 2 or image.shape[2] == 1: + return image + + xform = np.array([[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0]], dtype=np.float32) + y_image = image.dot(xform.T) + 16.0 + + return y_image + + + def convert_rgb_to_ycbcr(self, image): + if len(image.shape) <= 2 or image.shape[2] == 1: + return image + + xform = np.array( + [[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0], + [- 37.945 / 256.0, - 74.494 / 256.0, 112.439 / 256.0], + [112.439 / 256.0, - 94.154 / 256.0, - 18.285 / 256.0]], dtype=np.float32) + + ycbcr_image = image.dot(xform.T) + ycbcr_image[:, :, 0] += 16.0 + ycbcr_image[:, :, [1, 2]] += 128.0 + + return ycbcr_image + + def convert_ycbcr_to_rgb(self,ycbcr_image): + rgb_image = np.zeros([ycbcr_image.shape[0], ycbcr_image.shape[1], 3], dtype=np.float32) + + rgb_image[:, :, 0] = ycbcr_image[:, :, 0] - 16.0 + rgb_image[:, :, [1, 2]] = ycbcr_image[:, :, [1, 2]] - 128.0 + xform = np.array( + [[298.082 / 256.0, 0, 408.583 / 256.0], + [298.082 / 256.0, -100.291 / 256.0, -208.120 / 256.0], + [298.082 / 256.0, 516.412 / 256.0, 0]], dtype=np.float32) + rgb_image = rgb_image.dot(xform.T) + + return rgb_image + + def convert_y_and_cbcr_to_rgb(self,y_image, cbcr_image): + if len(y_image.shape) <= 2: + y_image = y_image.reshape[y_image.shape[0], y_image.shape[1], 1] + + if len(y_image.shape) == 3 and y_image.shape[2] == 3: + y_image = y_image[:, :, 0:1] + + ycbcr_image = np.zeros([y_image.shape[0], y_image.shape[1], 3], dtype=np.float32) + ycbcr_image[:, :, 0] = y_image[:, :, 0] + ycbcr_image[:, :, 1:3] = cbcr_image[:, :, 0:2] + + return self.convert_ycbcr_to_rgb(ycbcr_image) + + def flip(self, image, flip_type, invert=False): + if flip_type == 0: + return image + elif flip_type == 1: + return np.flipud(image) + elif flip_type == 2: + return np.fliplr(image) + elif flip_type == 3: + return np.flipud(np.fliplr(image)) + elif flip_type == 4: + return np.rot90(image, 1 if invert is False else -1) + elif flip_type == 5: + return np.rot90(image, -1 if invert is False else 1) + elif flip_type == 6: + if invert is False: + return np.flipud(np.rot90(image)) + else: + return np.rot90(np.flipud(image), -1) + elif flip_type == 7: + if invert is False: + return np.flipud(np.rot90(image, -1)) + else: + return np.rot90(np.flipud(image), 1) diff --git a/imagelib/IEPolys.py b/imagelib/IEPolys.py index 820a229..daeef28 100644 --- a/imagelib/IEPolys.py +++ b/imagelib/IEPolys.py @@ -1,104 +1,104 @@ -import numpy as np -import cv2 - -class IEPolysPoints: - def __init__(self, IEPolys_parent, type): - self.parent = IEPolys_parent - self.type = type - self.points = np.empty( (0,2), dtype=np.int32 ) - self.n_max = self.n = 0 - - def add(self,x,y): - self.points = np.append(self.points[0:self.n], [ (x,y) ], axis=0) - self.n_max = self.n = self.n + 1 - self.parent.dirty = True - - def n_dec(self): - self.n = max(0, self.n-1) - self.parent.dirty = True - return self.n - - def n_inc(self): - self.n = min(len(self.points), self.n+1) - self.parent.dirty = True - return self.n - - def n_clip(self): - self.points = self.points[0:self.n] - self.n_max = self.n - - def cur_point(self): - return self.points[self.n-1] - - def points_to_n(self): - return self.points[0:self.n] - - def set_points(self, points): - self.points = np.array(points) - self.n_max = self.n = len(points) - self.parent.dirty = True - -class IEPolys: - def __init__(self): - self.list = [] - self.n_max = self.n = 0 - self.dirty = True - - def add(self, type): - self.list = self.list[0:self.n] - self.list.append ( IEPolysPoints(self, type) ) - self.n_max = self.n = self.n + 1 - self.dirty = True - - def n_dec(self): - self.n = max(0, self.n-1) - self.dirty = True - return self.n - - def n_inc(self): - self.n = min(len(self.list), self.n+1) - self.dirty = True - return self.n - - def n_list(self): - return self.list[self.n-1] - - def n_clip(self): - self.list = self.list[0:self.n] - self.n_max = self.n - if self.n > 0: - self.list[-1].n_clip() - - def __iter__(self): - for n in range(self.n): - yield self.list[n] - - def switch_dirty(self): - d = self.dirty - self.dirty = False - return d - - def overlay_mask(self, mask): - h,w,c = mask.shape - white = (1,)*c - black = (0,)*c - for n in range(self.n): - poly = self.list[n] - if poly.n > 0: - cv2.fillPoly(mask, [poly.points_to_n()], white if poly.type == 1 else black ) - - def dump(self): - result = [] - for n in range(self.n): - l = self.list[n] - result += [ (l.type, l.points_to_n().tolist() ) ] - return result - - @staticmethod - def load(ie_polys=None): - obj = IEPolys() - if ie_polys is not None: - for (type, points) in ie_polys: - obj.add(type) - obj.n_list().set_points(points) +import numpy as np +import cv2 + +class IEPolysPoints: + def __init__(self, IEPolys_parent, type): + self.parent = IEPolys_parent + self.type = type + self.points = np.empty( (0,2), dtype=np.int32 ) + self.n_max = self.n = 0 + + def add(self,x,y): + self.points = np.append(self.points[0:self.n], [ (x,y) ], axis=0) + self.n_max = self.n = self.n + 1 + self.parent.dirty = True + + def n_dec(self): + self.n = max(0, self.n-1) + self.parent.dirty = True + return self.n + + def n_inc(self): + self.n = min(len(self.points), self.n+1) + self.parent.dirty = True + return self.n + + def n_clip(self): + self.points = self.points[0:self.n] + self.n_max = self.n + + def cur_point(self): + return self.points[self.n-1] + + def points_to_n(self): + return self.points[0:self.n] + + def set_points(self, points): + self.points = np.array(points) + self.n_max = self.n = len(points) + self.parent.dirty = True + +class IEPolys: + def __init__(self): + self.list = [] + self.n_max = self.n = 0 + self.dirty = True + + def add(self, type): + self.list = self.list[0:self.n] + self.list.append ( IEPolysPoints(self, type) ) + self.n_max = self.n = self.n + 1 + self.dirty = True + + def n_dec(self): + self.n = max(0, self.n-1) + self.dirty = True + return self.n + + def n_inc(self): + self.n = min(len(self.list), self.n+1) + self.dirty = True + return self.n + + def n_list(self): + return self.list[self.n-1] + + def n_clip(self): + self.list = self.list[0:self.n] + self.n_max = self.n + if self.n > 0: + self.list[-1].n_clip() + + def __iter__(self): + for n in range(self.n): + yield self.list[n] + + def switch_dirty(self): + d = self.dirty + self.dirty = False + return d + + def overlay_mask(self, mask): + h,w,c = mask.shape + white = (1,)*c + black = (0,)*c + for n in range(self.n): + poly = self.list[n] + if poly.n > 0: + cv2.fillPoly(mask, [poly.points_to_n()], white if poly.type == 1 else black ) + + def dump(self): + result = [] + for n in range(self.n): + l = self.list[n] + result += [ (l.type, l.points_to_n().tolist() ) ] + return result + + @staticmethod + def load(ie_polys=None): + obj = IEPolys() + if ie_polys is not None: + for (type, points) in ie_polys: + obj.add(type) + obj.n_list().set_points(points) return obj \ No newline at end of file diff --git a/imagelib/__init__.py b/imagelib/__init__.py index 14ed304..3436f56 100644 --- a/imagelib/__init__.py +++ b/imagelib/__init__.py @@ -1,27 +1,27 @@ -from .estimate_sharpness import estimate_sharpness -from .equalize_and_stack_square import equalize_and_stack_square - -from .text import get_text_image -from .text import get_draw_text_lines - -from .draw import draw_polygon -from .draw import draw_rect - -from .morph import morph_by_points - -from .warp import gen_warp_params -from .warp import warp_by_params - -from .reduce_colors import reduce_colors - -from .color_transfer import color_hist_match -from .color_transfer import reinhard_color_transfer -from .color_transfer import linear_color_transfer - -from .DCSCN import DCSCN - -from .common import normalize_channels - -from .IEPolys import IEPolys - +from .estimate_sharpness import estimate_sharpness +from .equalize_and_stack_square import equalize_and_stack_square + +from .text import get_text_image +from .text import get_draw_text_lines + +from .draw import draw_polygon +from .draw import draw_rect + +from .morph import morph_by_points + +from .warp import gen_warp_params +from .warp import warp_by_params + +from .reduce_colors import reduce_colors + +from .color_transfer import color_hist_match +from .color_transfer import reinhard_color_transfer +from .color_transfer import linear_color_transfer + +from .DCSCN import DCSCN + +from .common import normalize_channels + +from .IEPolys import IEPolys + from .blur import LinearMotionBlur \ No newline at end of file diff --git a/imagelib/blur.py b/imagelib/blur.py index e12ccfd..54c7199 100644 --- a/imagelib/blur.py +++ b/imagelib/blur.py @@ -1,143 +1,143 @@ -import math -import numpy as np -from PIL import Image -from scipy.signal import convolve2d -from skimage.draw import line - -class LineDictionary: - def __init__(self): - self.lines = {} - self.Create3x3Lines() - self.Create5x5Lines() - self.Create7x7Lines() - self.Create9x9Lines() - return - - def Create3x3Lines(self): - lines = {} - lines[0] = [1,0,1,2] - lines[45] = [2,0,0,2] - lines[90] = [0,1,2,1] - lines[135] = [0,0,2,2] - self.lines[3] = lines - return - - def Create5x5Lines(self): - lines = {} - lines[0] = [2,0,2,4] - lines[22.5] = [3,0,1,4] - lines[45] = [0,4,4,0] - lines[67.5] = [0,3,4,1] - lines[90] = [0,2,4,2] - lines[112.5] = [0,1,4,3] - lines[135] = [0,0,4,4] - lines[157.5]= [1,0,3,4] - self.lines[5] = lines - return - - def Create7x7Lines(self): - lines = {} - lines[0] = [3,0,3,6] - lines[15] = [4,0,2,6] - lines[30] = [5,0,1,6] - lines[45] = [6,0,0,6] - lines[60] = [6,1,0,5] - lines[75] = [6,2,0,4] - lines[90] = [0,3,6,3] - lines[105] = [0,2,6,4] - lines[120] = [0,1,6,5] - lines[135] = [0,0,6,6] - lines[150] = [1,0,5,6] - lines[165] = [2,0,4,6] - self.lines[7] = lines - return - - def Create9x9Lines(self): - lines = {} - lines[0] = [4,0,4,8] - lines[11.25] = [5,0,3,8] - lines[22.5] = [6,0,2,8] - lines[33.75] = [7,0,1,8] - lines[45] = [8,0,0,8] - lines[56.25] = [8,1,0,7] - lines[67.5] = [8,2,0,6] - lines[78.75] = [8,3,0,5] - lines[90] = [8,4,0,4] - lines[101.25] = [0,3,8,5] - lines[112.5] = [0,2,8,6] - lines[123.75] = [0,1,8,7] - lines[135] = [0,0,8,8] - lines[146.25] = [1,0,7,8] - lines[157.5] = [2,0,6,8] - lines[168.75] = [3,0,5,8] - self.lines[9] = lines - return - -lineLengths =[3,5,7,9] -lineTypes = ["full", "right", "left"] - -lineDict = LineDictionary() - -def LinearMotionBlur_random(img): - lineLengthIdx = np.random.randint(0, len(lineLengths)) - lineTypeIdx = np.random.randint(0, len(lineTypes)) - lineLength = lineLengths[lineLengthIdx] - lineType = lineTypes[lineTypeIdx] - lineAngle = randomAngle(lineLength) - return LinearMotionBlur(img, lineLength, lineAngle, lineType) - -def LinearMotionBlur(img, dim, angle, linetype='full'): - if len(img.shape) == 2: - h, w = img.shape - c = 1 - img = img[...,np.newaxis] - elif len(img.shape) == 3: - h,w,c = img.shape - else: - raise ValueError('unsupported img.shape') - - kernel = LineKernel(dim, angle, linetype) - - imgs = [] - for i in range(c): - imgs.append ( convolve2d(img[...,i], kernel, mode='same') ) - - img = np.stack(imgs, axis=-1) - img = np.squeeze(img) - return img - -def LineKernel(dim, angle, linetype): - kernelwidth = dim - kernelCenter = int(math.floor(dim/2)) - angle = SanitizeAngleValue(kernelCenter, angle) - kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32) - lineAnchors = lineDict.lines[dim][angle] - if(linetype == 'right'): - lineAnchors[0] = kernelCenter - lineAnchors[1] = kernelCenter - if(linetype == 'left'): - lineAnchors[2] = kernelCenter - lineAnchors[3] = kernelCenter - rr,cc = line(lineAnchors[0], lineAnchors[1], lineAnchors[2], lineAnchors[3]) - kernel[rr,cc]=1 - normalizationFactor = np.count_nonzero(kernel) - kernel = kernel / normalizationFactor - return kernel - -def SanitizeAngleValue(kernelCenter, angle): - numDistinctLines = kernelCenter * 4 - angle = math.fmod(angle, 180.0) - validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False) - angle = nearestValue(angle, validLineAngles) - return angle - -def nearestValue(theta, validAngles): - idx = (np.abs(validAngles-theta)).argmin() - return validAngles[idx] - -def randomAngle(kerneldim): - kernelCenter = int(math.floor(kerneldim/2)) - numDistinctLines = kernelCenter * 4 - validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False) - angleIdx = np.random.randint(0, len(validLineAngles)) +import math +import numpy as np +from PIL import Image +from scipy.signal import convolve2d +from skimage.draw import line + +class LineDictionary: + def __init__(self): + self.lines = {} + self.Create3x3Lines() + self.Create5x5Lines() + self.Create7x7Lines() + self.Create9x9Lines() + return + + def Create3x3Lines(self): + lines = {} + lines[0] = [1,0,1,2] + lines[45] = [2,0,0,2] + lines[90] = [0,1,2,1] + lines[135] = [0,0,2,2] + self.lines[3] = lines + return + + def Create5x5Lines(self): + lines = {} + lines[0] = [2,0,2,4] + lines[22.5] = [3,0,1,4] + lines[45] = [0,4,4,0] + lines[67.5] = [0,3,4,1] + lines[90] = [0,2,4,2] + lines[112.5] = [0,1,4,3] + lines[135] = [0,0,4,4] + lines[157.5]= [1,0,3,4] + self.lines[5] = lines + return + + def Create7x7Lines(self): + lines = {} + lines[0] = [3,0,3,6] + lines[15] = [4,0,2,6] + lines[30] = [5,0,1,6] + lines[45] = [6,0,0,6] + lines[60] = [6,1,0,5] + lines[75] = [6,2,0,4] + lines[90] = [0,3,6,3] + lines[105] = [0,2,6,4] + lines[120] = [0,1,6,5] + lines[135] = [0,0,6,6] + lines[150] = [1,0,5,6] + lines[165] = [2,0,4,6] + self.lines[7] = lines + return + + def Create9x9Lines(self): + lines = {} + lines[0] = [4,0,4,8] + lines[11.25] = [5,0,3,8] + lines[22.5] = [6,0,2,8] + lines[33.75] = [7,0,1,8] + lines[45] = [8,0,0,8] + lines[56.25] = [8,1,0,7] + lines[67.5] = [8,2,0,6] + lines[78.75] = [8,3,0,5] + lines[90] = [8,4,0,4] + lines[101.25] = [0,3,8,5] + lines[112.5] = [0,2,8,6] + lines[123.75] = [0,1,8,7] + lines[135] = [0,0,8,8] + lines[146.25] = [1,0,7,8] + lines[157.5] = [2,0,6,8] + lines[168.75] = [3,0,5,8] + self.lines[9] = lines + return + +lineLengths =[3,5,7,9] +lineTypes = ["full", "right", "left"] + +lineDict = LineDictionary() + +def LinearMotionBlur_random(img): + lineLengthIdx = np.random.randint(0, len(lineLengths)) + lineTypeIdx = np.random.randint(0, len(lineTypes)) + lineLength = lineLengths[lineLengthIdx] + lineType = lineTypes[lineTypeIdx] + lineAngle = randomAngle(lineLength) + return LinearMotionBlur(img, lineLength, lineAngle, lineType) + +def LinearMotionBlur(img, dim, angle, linetype='full'): + if len(img.shape) == 2: + h, w = img.shape + c = 1 + img = img[...,np.newaxis] + elif len(img.shape) == 3: + h,w,c = img.shape + else: + raise ValueError('unsupported img.shape') + + kernel = LineKernel(dim, angle, linetype) + + imgs = [] + for i in range(c): + imgs.append ( convolve2d(img[...,i], kernel, mode='same') ) + + img = np.stack(imgs, axis=-1) + img = np.squeeze(img) + return img + +def LineKernel(dim, angle, linetype): + kernelwidth = dim + kernelCenter = int(math.floor(dim/2)) + angle = SanitizeAngleValue(kernelCenter, angle) + kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32) + lineAnchors = lineDict.lines[dim][angle] + if(linetype == 'right'): + lineAnchors[0] = kernelCenter + lineAnchors[1] = kernelCenter + if(linetype == 'left'): + lineAnchors[2] = kernelCenter + lineAnchors[3] = kernelCenter + rr,cc = line(lineAnchors[0], lineAnchors[1], lineAnchors[2], lineAnchors[3]) + kernel[rr,cc]=1 + normalizationFactor = np.count_nonzero(kernel) + kernel = kernel / normalizationFactor + return kernel + +def SanitizeAngleValue(kernelCenter, angle): + numDistinctLines = kernelCenter * 4 + angle = math.fmod(angle, 180.0) + validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False) + angle = nearestValue(angle, validLineAngles) + return angle + +def nearestValue(theta, validAngles): + idx = (np.abs(validAngles-theta)).argmin() + return validAngles[idx] + +def randomAngle(kerneldim): + kernelCenter = int(math.floor(kerneldim/2)) + numDistinctLines = kernelCenter * 4 + validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False) + angleIdx = np.random.randint(0, len(validLineAngles)) return int(validLineAngles[angleIdx]) \ No newline at end of file diff --git a/imagelib/color_transfer.py b/imagelib/color_transfer.py index eb66074..1a7f1dd 100644 --- a/imagelib/color_transfer.py +++ b/imagelib/color_transfer.py @@ -1,191 +1,191 @@ -import numpy as np -import cv2 - -def reinhard_color_transfer(target, source, clip=False, preserve_paper=False, source_mask=None, target_mask=None): - """ - Transfers the color distribution from the source to the target - image using the mean and standard deviations of the L*a*b* - color space. - - This implementation is (loosely) based on to the "Color Transfer - between Images" paper by Reinhard et al., 2001. - - Parameters: - ------- - source: NumPy array - OpenCV image in BGR color space (the source image) - target: NumPy array - OpenCV image in BGR color space (the target image) - clip: Should components of L*a*b* image be scaled by np.clip before - converting back to BGR color space? - If False then components will be min-max scaled appropriately. - Clipping will keep target image brightness truer to the input. - Scaling will adjust image brightness to avoid washed out portions - in the resulting color transfer that can be caused by clipping. - preserve_paper: Should color transfer strictly follow methodology - layed out in original paper? The method does not always produce - aesthetically pleasing results. - If False then L*a*b* components will scaled using the reciprocal of - the scaling factor proposed in the paper. This method seems to produce - more consistently aesthetically pleasing results - - Returns: - ------- - transfer: NumPy array - OpenCV image (w, h, 3) NumPy array (uint8) - """ - - - # convert the images from the RGB to L*ab* color space, being - # sure to utilizing the floating point data type (note: OpenCV - # expects floats to be 32-bit, so use that instead of 64-bit) - source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32) - target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype(np.float32) - - # compute color statistics for the source and target images - src_input = source if source_mask is None else source*source_mask - tgt_input = target if target_mask is None else target*target_mask - (lMeanSrc, lStdSrc, aMeanSrc, aStdSrc, bMeanSrc, bStdSrc) = lab_image_stats(src_input) - (lMeanTar, lStdTar, aMeanTar, aStdTar, bMeanTar, bStdTar) = lab_image_stats(tgt_input) - - # subtract the means from the target image - (l, a, b) = cv2.split(target) - l -= lMeanTar - a -= aMeanTar - b -= bMeanTar - - if preserve_paper: - # scale by the standard deviations using paper proposed factor - l = (lStdTar / lStdSrc) * l - a = (aStdTar / aStdSrc) * a - b = (bStdTar / bStdSrc) * b - else: - # scale by the standard deviations using reciprocal of paper proposed factor - l = (lStdSrc / lStdTar) * l - a = (aStdSrc / aStdTar) * a - b = (bStdSrc / bStdTar) * b - - # add in the source mean - l += lMeanSrc - a += aMeanSrc - b += bMeanSrc - - # clip/scale the pixel intensities to [0, 255] if they fall - # outside this range - l = _scale_array(l, clip=clip) - a = _scale_array(a, clip=clip) - b = _scale_array(b, clip=clip) - - # merge the channels together and convert back to the RGB color - # space, being sure to utilize the 8-bit unsigned integer data - # type - transfer = cv2.merge([l, a, b]) - transfer = cv2.cvtColor(transfer.astype(np.uint8), cv2.COLOR_LAB2BGR) - - # return the color transferred image - return transfer - -def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5): - ''' - Matches the colour distribution of the target image to that of the source image - using a linear transform. - Images are expected to be of form (w,h,c) and float in [0,1]. - Modes are chol, pca or sym for different choices of basis. - ''' - mu_t = target_img.mean(0).mean(0) - t = target_img - mu_t - t = t.transpose(2,0,1).reshape(3,-1) - Ct = t.dot(t.T) / t.shape[1] + eps * np.eye(t.shape[0]) - mu_s = source_img.mean(0).mean(0) - s = source_img - mu_s - s = s.transpose(2,0,1).reshape(3,-1) - Cs = s.dot(s.T) / s.shape[1] + eps * np.eye(s.shape[0]) - if mode == 'chol': - chol_t = np.linalg.cholesky(Ct) - chol_s = np.linalg.cholesky(Cs) - ts = chol_s.dot(np.linalg.inv(chol_t)).dot(t) - if mode == 'pca': - eva_t, eve_t = np.linalg.eigh(Ct) - Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) - eva_s, eve_s = np.linalg.eigh(Cs) - Qs = eve_s.dot(np.sqrt(np.diag(eva_s))).dot(eve_s.T) - ts = Qs.dot(np.linalg.inv(Qt)).dot(t) - if mode == 'sym': - eva_t, eve_t = np.linalg.eigh(Ct) - Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) - Qt_Cs_Qt = Qt.dot(Cs).dot(Qt) - eva_QtCsQt, eve_QtCsQt = np.linalg.eigh(Qt_Cs_Qt) - QtCsQt = eve_QtCsQt.dot(np.sqrt(np.diag(eva_QtCsQt))).dot(eve_QtCsQt.T) - ts = np.linalg.inv(Qt).dot(QtCsQt).dot(np.linalg.inv(Qt)).dot(t) - matched_img = ts.reshape(*target_img.transpose(2,0,1).shape).transpose(1,2,0) - matched_img += mu_s - matched_img[matched_img>1] = 1 - matched_img[matched_img<0] = 0 - return matched_img - -def lab_image_stats(image): - # compute the mean and standard deviation of each channel - (l, a, b) = cv2.split(image) - (lMean, lStd) = (l.mean(), l.std()) - (aMean, aStd) = (a.mean(), a.std()) - (bMean, bStd) = (b.mean(), b.std()) - - # return the color statistics - return (lMean, lStd, aMean, aStd, bMean, bStd) - -def _scale_array(arr, clip=True): - if clip: - return np.clip(arr, 0, 255) - - mn = arr.min() - mx = arr.max() - scale_range = (max([mn, 0]), min([mx, 255])) - - if mn < scale_range[0] or mx > scale_range[1]: - return (scale_range[1] - scale_range[0]) * (arr - mn) / (mx - mn) + scale_range[0] - - return arr - -def channel_hist_match(source, template, hist_match_threshold=255, mask=None): - # Code borrowed from: - # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x - masked_source = source - masked_template = template - - if mask is not None: - masked_source = source * mask - masked_template = template * mask - - oldshape = source.shape - source = source.ravel() - template = template.ravel() - masked_source = masked_source.ravel() - masked_template = masked_template.ravel() - s_values, bin_idx, s_counts = np.unique(source, return_inverse=True, - return_counts=True) - t_values, t_counts = np.unique(template, return_counts=True) - ms_values, mbin_idx, ms_counts = np.unique(source, return_inverse=True, - return_counts=True) - mt_values, mt_counts = np.unique(template, return_counts=True) - - s_quantiles = np.cumsum(s_counts).astype(np.float64) - s_quantiles = hist_match_threshold * s_quantiles / s_quantiles[-1] - t_quantiles = np.cumsum(t_counts).astype(np.float64) - t_quantiles = 255 * t_quantiles / t_quantiles[-1] - interp_t_values = np.interp(s_quantiles, t_quantiles, t_values) - - return interp_t_values[bin_idx].reshape(oldshape) - -def color_hist_match(src_im, tar_im, hist_match_threshold=255): - h,w,c = src_im.shape - matched_R = channel_hist_match(src_im[:,:,0], tar_im[:,:,0], hist_match_threshold, None) - matched_G = channel_hist_match(src_im[:,:,1], tar_im[:,:,1], hist_match_threshold, None) - matched_B = channel_hist_match(src_im[:,:,2], tar_im[:,:,2], hist_match_threshold, None) - - to_stack = (matched_R, matched_G, matched_B) - for i in range(3, c): - to_stack += ( src_im[:,:,i],) - - - matched = np.stack(to_stack, axis=-1).astype(src_im.dtype) - return matched +import numpy as np +import cv2 + +def reinhard_color_transfer(target, source, clip=False, preserve_paper=False, source_mask=None, target_mask=None): + """ + Transfers the color distribution from the source to the target + image using the mean and standard deviations of the L*a*b* + color space. + + This implementation is (loosely) based on to the "Color Transfer + between Images" paper by Reinhard et al., 2001. + + Parameters: + ------- + source: NumPy array + OpenCV image in BGR color space (the source image) + target: NumPy array + OpenCV image in BGR color space (the target image) + clip: Should components of L*a*b* image be scaled by np.clip before + converting back to BGR color space? + If False then components will be min-max scaled appropriately. + Clipping will keep target image brightness truer to the input. + Scaling will adjust image brightness to avoid washed out portions + in the resulting color transfer that can be caused by clipping. + preserve_paper: Should color transfer strictly follow methodology + layed out in original paper? The method does not always produce + aesthetically pleasing results. + If False then L*a*b* components will scaled using the reciprocal of + the scaling factor proposed in the paper. This method seems to produce + more consistently aesthetically pleasing results + + Returns: + ------- + transfer: NumPy array + OpenCV image (w, h, 3) NumPy array (uint8) + """ + + + # convert the images from the RGB to L*ab* color space, being + # sure to utilizing the floating point data type (note: OpenCV + # expects floats to be 32-bit, so use that instead of 64-bit) + source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32) + target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype(np.float32) + + # compute color statistics for the source and target images + src_input = source if source_mask is None else source*source_mask + tgt_input = target if target_mask is None else target*target_mask + (lMeanSrc, lStdSrc, aMeanSrc, aStdSrc, bMeanSrc, bStdSrc) = lab_image_stats(src_input) + (lMeanTar, lStdTar, aMeanTar, aStdTar, bMeanTar, bStdTar) = lab_image_stats(tgt_input) + + # subtract the means from the target image + (l, a, b) = cv2.split(target) + l -= lMeanTar + a -= aMeanTar + b -= bMeanTar + + if preserve_paper: + # scale by the standard deviations using paper proposed factor + l = (lStdTar / lStdSrc) * l + a = (aStdTar / aStdSrc) * a + b = (bStdTar / bStdSrc) * b + else: + # scale by the standard deviations using reciprocal of paper proposed factor + l = (lStdSrc / lStdTar) * l + a = (aStdSrc / aStdTar) * a + b = (bStdSrc / bStdTar) * b + + # add in the source mean + l += lMeanSrc + a += aMeanSrc + b += bMeanSrc + + # clip/scale the pixel intensities to [0, 255] if they fall + # outside this range + l = _scale_array(l, clip=clip) + a = _scale_array(a, clip=clip) + b = _scale_array(b, clip=clip) + + # merge the channels together and convert back to the RGB color + # space, being sure to utilize the 8-bit unsigned integer data + # type + transfer = cv2.merge([l, a, b]) + transfer = cv2.cvtColor(transfer.astype(np.uint8), cv2.COLOR_LAB2BGR) + + # return the color transferred image + return transfer + +def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5): + ''' + Matches the colour distribution of the target image to that of the source image + using a linear transform. + Images are expected to be of form (w,h,c) and float in [0,1]. + Modes are chol, pca or sym for different choices of basis. + ''' + mu_t = target_img.mean(0).mean(0) + t = target_img - mu_t + t = t.transpose(2,0,1).reshape(3,-1) + Ct = t.dot(t.T) / t.shape[1] + eps * np.eye(t.shape[0]) + mu_s = source_img.mean(0).mean(0) + s = source_img - mu_s + s = s.transpose(2,0,1).reshape(3,-1) + Cs = s.dot(s.T) / s.shape[1] + eps * np.eye(s.shape[0]) + if mode == 'chol': + chol_t = np.linalg.cholesky(Ct) + chol_s = np.linalg.cholesky(Cs) + ts = chol_s.dot(np.linalg.inv(chol_t)).dot(t) + if mode == 'pca': + eva_t, eve_t = np.linalg.eigh(Ct) + Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) + eva_s, eve_s = np.linalg.eigh(Cs) + Qs = eve_s.dot(np.sqrt(np.diag(eva_s))).dot(eve_s.T) + ts = Qs.dot(np.linalg.inv(Qt)).dot(t) + if mode == 'sym': + eva_t, eve_t = np.linalg.eigh(Ct) + Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) + Qt_Cs_Qt = Qt.dot(Cs).dot(Qt) + eva_QtCsQt, eve_QtCsQt = np.linalg.eigh(Qt_Cs_Qt) + QtCsQt = eve_QtCsQt.dot(np.sqrt(np.diag(eva_QtCsQt))).dot(eve_QtCsQt.T) + ts = np.linalg.inv(Qt).dot(QtCsQt).dot(np.linalg.inv(Qt)).dot(t) + matched_img = ts.reshape(*target_img.transpose(2,0,1).shape).transpose(1,2,0) + matched_img += mu_s + matched_img[matched_img>1] = 1 + matched_img[matched_img<0] = 0 + return matched_img + +def lab_image_stats(image): + # compute the mean and standard deviation of each channel + (l, a, b) = cv2.split(image) + (lMean, lStd) = (l.mean(), l.std()) + (aMean, aStd) = (a.mean(), a.std()) + (bMean, bStd) = (b.mean(), b.std()) + + # return the color statistics + return (lMean, lStd, aMean, aStd, bMean, bStd) + +def _scale_array(arr, clip=True): + if clip: + return np.clip(arr, 0, 255) + + mn = arr.min() + mx = arr.max() + scale_range = (max([mn, 0]), min([mx, 255])) + + if mn < scale_range[0] or mx > scale_range[1]: + return (scale_range[1] - scale_range[0]) * (arr - mn) / (mx - mn) + scale_range[0] + + return arr + +def channel_hist_match(source, template, hist_match_threshold=255, mask=None): + # Code borrowed from: + # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x + masked_source = source + masked_template = template + + if mask is not None: + masked_source = source * mask + masked_template = template * mask + + oldshape = source.shape + source = source.ravel() + template = template.ravel() + masked_source = masked_source.ravel() + masked_template = masked_template.ravel() + s_values, bin_idx, s_counts = np.unique(source, return_inverse=True, + return_counts=True) + t_values, t_counts = np.unique(template, return_counts=True) + ms_values, mbin_idx, ms_counts = np.unique(source, return_inverse=True, + return_counts=True) + mt_values, mt_counts = np.unique(template, return_counts=True) + + s_quantiles = np.cumsum(s_counts).astype(np.float64) + s_quantiles = hist_match_threshold * s_quantiles / s_quantiles[-1] + t_quantiles = np.cumsum(t_counts).astype(np.float64) + t_quantiles = 255 * t_quantiles / t_quantiles[-1] + interp_t_values = np.interp(s_quantiles, t_quantiles, t_values) + + return interp_t_values[bin_idx].reshape(oldshape) + +def color_hist_match(src_im, tar_im, hist_match_threshold=255): + h,w,c = src_im.shape + matched_R = channel_hist_match(src_im[:,:,0], tar_im[:,:,0], hist_match_threshold, None) + matched_G = channel_hist_match(src_im[:,:,1], tar_im[:,:,1], hist_match_threshold, None) + matched_B = channel_hist_match(src_im[:,:,2], tar_im[:,:,2], hist_match_threshold, None) + + to_stack = (matched_R, matched_G, matched_B) + for i in range(3, c): + to_stack += ( src_im[:,:,i],) + + + matched = np.stack(to_stack, axis=-1).astype(src_im.dtype) + return matched diff --git a/imagelib/common.py b/imagelib/common.py index 229387f..e63c998 100644 --- a/imagelib/common.py +++ b/imagelib/common.py @@ -1,21 +1,21 @@ -import numpy as np - -def normalize_channels(img, target_channels): - img_shape_len = len(img.shape) - if img_shape_len == 2: - h, w = img.shape - c = 0 - elif img_shape_len == 3: - h, w, c = img.shape - else: - raise ValueError("normalize: incorrect image dimensions.") - - if c == 0 and target_channels > 0: - img = img[...,np.newaxis] - if c == 1 and target_channels > 1: - img = np.repeat (img, target_channels, -1) - if c > target_channels: - img = img[...,0:target_channels] - c = target_channels - +import numpy as np + +def normalize_channels(img, target_channels): + img_shape_len = len(img.shape) + if img_shape_len == 2: + h, w = img.shape + c = 0 + elif img_shape_len == 3: + h, w, c = img.shape + else: + raise ValueError("normalize: incorrect image dimensions.") + + if c == 0 and target_channels > 0: + img = img[...,np.newaxis] + if c == 1 and target_channels > 1: + img = np.repeat (img, target_channels, -1) + if c > target_channels: + img = img[...,0:target_channels] + c = target_channels + return img \ No newline at end of file diff --git a/imagelib/draw.py b/imagelib/draw.py index 3de1191..c87dc0a 100644 --- a/imagelib/draw.py +++ b/imagelib/draw.py @@ -1,13 +1,13 @@ -import numpy as np -import cv2 - -def draw_polygon (image, points, color, thickness = 1): - points_len = len(points) - for i in range (0, points_len): - p0 = tuple( points[i] ) - p1 = tuple( points[ (i+1) % points_len] ) - cv2.line (image, p0, p1, color, thickness=thickness) - -def draw_rect(image, rect, color, thickness=1): - l,t,r,b = rect - draw_polygon (image, [ (l,t), (r,t), (r,b), (l,b ) ], color, thickness) +import numpy as np +import cv2 + +def draw_polygon (image, points, color, thickness = 1): + points_len = len(points) + for i in range (0, points_len): + p0 = tuple( points[i] ) + p1 = tuple( points[ (i+1) % points_len] ) + cv2.line (image, p0, p1, color, thickness=thickness) + +def draw_rect(image, rect, color, thickness=1): + l,t,r,b = rect + draw_polygon (image, [ (l,t), (r,t), (r,b), (l,b ) ], color, thickness) diff --git a/imagelib/equalize_and_stack_square.py b/imagelib/equalize_and_stack_square.py index 31c435a..e25612c 100644 --- a/imagelib/equalize_and_stack_square.py +++ b/imagelib/equalize_and_stack_square.py @@ -1,45 +1,45 @@ -import numpy as np -import cv2 - -def equalize_and_stack_square (images, axis=1): - max_c = max ([ 1 if len(image.shape) == 2 else image.shape[2] for image in images ] ) - - target_wh = 99999 - for i,image in enumerate(images): - if len(image.shape) == 2: - h,w = image.shape - c = 1 - else: - h,w,c = image.shape - - if h < target_wh: - target_wh = h - - if w < target_wh: - target_wh = w - - for i,image in enumerate(images): - if len(image.shape) == 2: - h,w = image.shape - c = 1 - else: - h,w,c = image.shape - - if c < max_c: - if c == 1: - if len(image.shape) == 2: - image = np.expand_dims ( image, -1 ) - image = np.concatenate ( (image,)*max_c, -1 ) - elif c == 2: #GA - image = np.expand_dims ( image[...,0], -1 ) - image = np.concatenate ( (image,)*max_c, -1 ) - else: - image = np.concatenate ( (image, np.ones((h,w,max_c - c))), -1 ) - - if h != target_wh or w != target_wh: - image = cv2.resize ( image, (target_wh, target_wh) ) - h,w,c = image.shape - - images[i] = image - +import numpy as np +import cv2 + +def equalize_and_stack_square (images, axis=1): + max_c = max ([ 1 if len(image.shape) == 2 else image.shape[2] for image in images ] ) + + target_wh = 99999 + for i,image in enumerate(images): + if len(image.shape) == 2: + h,w = image.shape + c = 1 + else: + h,w,c = image.shape + + if h < target_wh: + target_wh = h + + if w < target_wh: + target_wh = w + + for i,image in enumerate(images): + if len(image.shape) == 2: + h,w = image.shape + c = 1 + else: + h,w,c = image.shape + + if c < max_c: + if c == 1: + if len(image.shape) == 2: + image = np.expand_dims ( image, -1 ) + image = np.concatenate ( (image,)*max_c, -1 ) + elif c == 2: #GA + image = np.expand_dims ( image[...,0], -1 ) + image = np.concatenate ( (image,)*max_c, -1 ) + else: + image = np.concatenate ( (image, np.ones((h,w,max_c - c))), -1 ) + + if h != target_wh or w != target_wh: + image = cv2.resize ( image, (target_wh, target_wh) ) + h,w,c = image.shape + + images[i] = image + return np.concatenate ( images, axis = 1 ) \ No newline at end of file diff --git a/imagelib/estimate_sharpness.py b/imagelib/estimate_sharpness.py index 01ef0b7..fbe5b91 100644 --- a/imagelib/estimate_sharpness.py +++ b/imagelib/estimate_sharpness.py @@ -1,277 +1,277 @@ -""" -Copyright (c) 2009-2010 Arizona Board of Regents. All Rights Reserved. - Contact: Lina Karam (karam@asu.edu) and Niranjan Narvekar (nnarveka@asu.edu) - Image, Video, and Usabilty (IVU) Lab, http://ivulab.asu.edu , Arizona State University - This copyright statement may not be removed from any file containing it or from modifications to these files. - This copyright notice must also be included in any file or product that is derived from the source files. - - Redistribution and use of this code in source and binary forms, with or without modification, are permitted provided that the - following conditions are met: - - Redistribution's of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - - Redistribution's in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the distribution. - - The Image, Video, and Usability Laboratory (IVU Lab, http://ivulab.asu.edu) is acknowledged in any publication that - reports research results using this code, copies of this code, or modifications of this code. - The code and our papers are to be cited in the bibliography as: - -N. D. Narvekar and L. J. Karam, "CPBD Sharpness Metric Software", http://ivulab.asu.edu/Quality/CPBD - -N. D. Narvekar and L. J. Karam, "A No-Reference Image Blur Metric Based on the Cumulative -Probability of Blur Detection (CPBD)," accepted and to appear in the IEEE Transactions on Image Processing, 2011. - -N. D. Narvekar and L. J. Karam, "An Improved No-Reference Sharpness Metric Based on the Probability of Blur Detection," International Workshop on Video Processing and Quality Metrics for Consumer Electronics (VPQM), January 2010, http://www.vpqm.org (pdf) - -N. D. Narvekar and L. J. Karam, "A No Reference Perceptual Quality Metric based on Cumulative Probability of Blur Detection," First International Workshop on the Quality of Multimedia Experience (QoMEX), pp. 87-91, July 2009. - - DISCLAIMER: - This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the Arizona Board of Regents, Arizona State University, IVU Lab members, authors or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute -goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. -""" - -import numpy as np -import cv2 -from math import atan2, pi -from scipy.ndimage import convolve -from skimage.filters.edges import HSOBEL_WEIGHTS -from skimage.feature import canny - -def sobel(image): - # type: (numpy.ndarray) -> numpy.ndarray - """ - Find edges using the Sobel approximation to the derivatives. - - Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l196). - """ - - h1 = np.array(HSOBEL_WEIGHTS) - h1 /= np.sum(abs(h1)) # normalize h1 - - strength2 = np.square(convolve(image, h1.T)) - - # Note: https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l59 - thresh2 = 2 * np.sqrt(np.mean(strength2)) - - strength2[strength2 <= thresh2] = 0 - return _simple_thinning(strength2) - - -def _simple_thinning(strength): - # type: (numpy.ndarray) -> numpy.ndarray - """ - Perform a very simple thinning. - - Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l512). - """ - num_rows, num_cols = strength.shape - - zero_column = np.zeros((num_rows, 1)) - zero_row = np.zeros((1, num_cols)) - - x = ( - (strength > np.c_[zero_column, strength[:, :-1]]) & - (strength > np.c_[strength[:, 1:], zero_column]) - ) - - y = ( - (strength > np.r_[zero_row, strength[:-1, :]]) & - (strength > np.r_[strength[1:, :], zero_row]) - ) - - return x | y - - - - - -# threshold to characterize blocks as edge/non-edge blocks -THRESHOLD = 0.002 -# fitting parameter -BETA = 3.6 -# block size -BLOCK_HEIGHT, BLOCK_WIDTH = (64, 64) -# just noticeable widths based on the perceptual experiments -WIDTH_JNB = np.concatenate([5*np.ones(51), 3*np.ones(205)]) - - -def compute(image): - # type: (numpy.ndarray) -> float - """Compute the sharpness metric for the given data.""" - - # convert the image to double for further processing - image = image.astype(np.float64) - - # edge detection using canny and sobel canny edge detection is done to - # classify the blocks as edge or non-edge blocks and sobel edge - # detection is done for the purpose of edge width measurement. - canny_edges = canny(image) - sobel_edges = sobel(image) - - # edge width calculation - marziliano_widths = marziliano_method(sobel_edges, image) - - # sharpness metric calculation - return _calculate_sharpness_metric(image, canny_edges, marziliano_widths) - - -def marziliano_method(edges, image): - # type: (numpy.ndarray, numpy.ndarray) -> numpy.ndarray - """ - Calculate the widths of the given edges. - - :return: A matrix with the same dimensions as the given image with 0's at - non-edge locations and edge-widths at the edge locations. - """ - - # `edge_widths` consists of zero and non-zero values. A zero value - # indicates that there is no edge at that position and a non-zero value - # indicates that there is an edge at that position and the value itself - # gives the edge width. - edge_widths = np.zeros(image.shape) - - # find the gradient for the image - gradient_y, gradient_x = np.gradient(image) - - # dimensions of the image - img_height, img_width = image.shape - - # holds the angle information of the edges - edge_angles = np.zeros(image.shape) - - # calculate the angle of the edges - for row in range(img_height): - for col in range(img_width): - if gradient_x[row, col] != 0: - edge_angles[row, col] = atan2(gradient_y[row, col], gradient_x[row, col]) * (180 / pi) - elif gradient_x[row, col] == 0 and gradient_y[row, col] == 0: - edge_angles[row,col] = 0 - elif gradient_x[row, col] == 0 and gradient_y[row, col] == pi/2: - edge_angles[row, col] = 90 - - - if np.any(edge_angles): - - # quantize the angle - quantized_angles = 45 * np.round(edge_angles / 45) - - for row in range(1, img_height - 1): - for col in range(1, img_width - 1): - if edges[row, col] == 1: - - # gradient angle = 180 or -180 - if quantized_angles[row, col] == 180 or quantized_angles[row, col] == -180: - for margin in range(100 + 1): - inner_border = (col - 1) - margin - outer_border = (col - 2) - margin - - # outside image or intensity increasing from left to right - if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) <= 0: - break - - width_left = margin + 1 - - for margin in range(100 + 1): - inner_border = (col + 1) + margin - outer_border = (col + 2) + margin - - # outside image or intensity increasing from left to right - if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) >= 0: - break - - width_right = margin + 1 - - edge_widths[row, col] = width_left + width_right - - - # gradient angle = 0 - if quantized_angles[row, col] == 0: - for margin in range(100 + 1): - inner_border = (col - 1) - margin - outer_border = (col - 2) - margin - - # outside image or intensity decreasing from left to right - if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) >= 0: - break - - width_left = margin + 1 - - for margin in range(100 + 1): - inner_border = (col + 1) + margin - outer_border = (col + 2) + margin - - # outside image or intensity decreasing from left to right - if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) <= 0: - break - - width_right = margin + 1 - - edge_widths[row, col] = width_right + width_left - - return edge_widths - - -def _calculate_sharpness_metric(image, edges, edge_widths): - # type: (numpy.array, numpy.array, numpy.array) -> numpy.float64 - - # get the size of image - img_height, img_width = image.shape - - total_num_edges = 0 - hist_pblur = np.zeros(101) - - # maximum block indices - num_blocks_vertically = int(img_height / BLOCK_HEIGHT) - num_blocks_horizontally = int(img_width / BLOCK_WIDTH) - - # loop over the blocks - for i in range(num_blocks_vertically): - for j in range(num_blocks_horizontally): - - # get the row and col indices for the block pixel positions - rows = slice(BLOCK_HEIGHT * i, BLOCK_HEIGHT * (i + 1)) - cols = slice(BLOCK_WIDTH * j, BLOCK_WIDTH * (j + 1)) - - if is_edge_block(edges[rows, cols], THRESHOLD): - block_widths = edge_widths[rows, cols] - # rotate block to simulate column-major boolean indexing - block_widths = np.rot90(np.flipud(block_widths), 3) - block_widths = block_widths[block_widths != 0] - - block_contrast = get_block_contrast(image[rows, cols]) - block_jnb = WIDTH_JNB[block_contrast] - - # calculate the probability of blur detection at the edges - # detected in the block - prob_blur_detection = 1 - np.exp(-abs(block_widths/block_jnb) ** BETA) - - # update the statistics using the block information - for probability in prob_blur_detection: - bucket = int(round(probability * 100)) - hist_pblur[bucket] += 1 - total_num_edges += 1 - - # normalize the pdf - if total_num_edges > 0: - hist_pblur = hist_pblur / total_num_edges - - # calculate the sharpness metric - return np.sum(hist_pblur[:64]) - - -def is_edge_block(block, threshold): - # type: (numpy.ndarray, float) -> bool - """Decide whether the given block is an edge block.""" - return np.count_nonzero(block) > (block.size * threshold) - - -def get_block_contrast(block): - # type: (numpy.ndarray) -> int - return int(np.max(block) - np.min(block)) - - -def estimate_sharpness(image): - height, width = image.shape[:2] - - if image.ndim == 3: - image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - - return compute(image) +""" +Copyright (c) 2009-2010 Arizona Board of Regents. All Rights Reserved. + Contact: Lina Karam (karam@asu.edu) and Niranjan Narvekar (nnarveka@asu.edu) + Image, Video, and Usabilty (IVU) Lab, http://ivulab.asu.edu , Arizona State University + This copyright statement may not be removed from any file containing it or from modifications to these files. + This copyright notice must also be included in any file or product that is derived from the source files. + + Redistribution and use of this code in source and binary forms, with or without modification, are permitted provided that the + following conditions are met: + - Redistribution's of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + - Redistribution's in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the distribution. + - The Image, Video, and Usability Laboratory (IVU Lab, http://ivulab.asu.edu) is acknowledged in any publication that + reports research results using this code, copies of this code, or modifications of this code. + The code and our papers are to be cited in the bibliography as: + +N. D. Narvekar and L. J. Karam, "CPBD Sharpness Metric Software", http://ivulab.asu.edu/Quality/CPBD + +N. D. Narvekar and L. J. Karam, "A No-Reference Image Blur Metric Based on the Cumulative +Probability of Blur Detection (CPBD)," accepted and to appear in the IEEE Transactions on Image Processing, 2011. + +N. D. Narvekar and L. J. Karam, "An Improved No-Reference Sharpness Metric Based on the Probability of Blur Detection," International Workshop on Video Processing and Quality Metrics for Consumer Electronics (VPQM), January 2010, http://www.vpqm.org (pdf) + +N. D. Narvekar and L. J. Karam, "A No Reference Perceptual Quality Metric based on Cumulative Probability of Blur Detection," First International Workshop on the Quality of Multimedia Experience (QoMEX), pp. 87-91, July 2009. + + DISCLAIMER: + This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the Arizona Board of Regents, Arizona State University, IVU Lab members, authors or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute +goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. +""" + +import numpy as np +import cv2 +from math import atan2, pi +from scipy.ndimage import convolve +from skimage.filters.edges import HSOBEL_WEIGHTS +from skimage.feature import canny + +def sobel(image): + # type: (numpy.ndarray) -> numpy.ndarray + """ + Find edges using the Sobel approximation to the derivatives. + + Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l196). + """ + + h1 = np.array(HSOBEL_WEIGHTS) + h1 /= np.sum(abs(h1)) # normalize h1 + + strength2 = np.square(convolve(image, h1.T)) + + # Note: https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l59 + thresh2 = 2 * np.sqrt(np.mean(strength2)) + + strength2[strength2 <= thresh2] = 0 + return _simple_thinning(strength2) + + +def _simple_thinning(strength): + # type: (numpy.ndarray) -> numpy.ndarray + """ + Perform a very simple thinning. + + Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l512). + """ + num_rows, num_cols = strength.shape + + zero_column = np.zeros((num_rows, 1)) + zero_row = np.zeros((1, num_cols)) + + x = ( + (strength > np.c_[zero_column, strength[:, :-1]]) & + (strength > np.c_[strength[:, 1:], zero_column]) + ) + + y = ( + (strength > np.r_[zero_row, strength[:-1, :]]) & + (strength > np.r_[strength[1:, :], zero_row]) + ) + + return x | y + + + + + +# threshold to characterize blocks as edge/non-edge blocks +THRESHOLD = 0.002 +# fitting parameter +BETA = 3.6 +# block size +BLOCK_HEIGHT, BLOCK_WIDTH = (64, 64) +# just noticeable widths based on the perceptual experiments +WIDTH_JNB = np.concatenate([5*np.ones(51), 3*np.ones(205)]) + + +def compute(image): + # type: (numpy.ndarray) -> float + """Compute the sharpness metric for the given data.""" + + # convert the image to double for further processing + image = image.astype(np.float64) + + # edge detection using canny and sobel canny edge detection is done to + # classify the blocks as edge or non-edge blocks and sobel edge + # detection is done for the purpose of edge width measurement. + canny_edges = canny(image) + sobel_edges = sobel(image) + + # edge width calculation + marziliano_widths = marziliano_method(sobel_edges, image) + + # sharpness metric calculation + return _calculate_sharpness_metric(image, canny_edges, marziliano_widths) + + +def marziliano_method(edges, image): + # type: (numpy.ndarray, numpy.ndarray) -> numpy.ndarray + """ + Calculate the widths of the given edges. + + :return: A matrix with the same dimensions as the given image with 0's at + non-edge locations and edge-widths at the edge locations. + """ + + # `edge_widths` consists of zero and non-zero values. A zero value + # indicates that there is no edge at that position and a non-zero value + # indicates that there is an edge at that position and the value itself + # gives the edge width. + edge_widths = np.zeros(image.shape) + + # find the gradient for the image + gradient_y, gradient_x = np.gradient(image) + + # dimensions of the image + img_height, img_width = image.shape + + # holds the angle information of the edges + edge_angles = np.zeros(image.shape) + + # calculate the angle of the edges + for row in range(img_height): + for col in range(img_width): + if gradient_x[row, col] != 0: + edge_angles[row, col] = atan2(gradient_y[row, col], gradient_x[row, col]) * (180 / pi) + elif gradient_x[row, col] == 0 and gradient_y[row, col] == 0: + edge_angles[row,col] = 0 + elif gradient_x[row, col] == 0 and gradient_y[row, col] == pi/2: + edge_angles[row, col] = 90 + + + if np.any(edge_angles): + + # quantize the angle + quantized_angles = 45 * np.round(edge_angles / 45) + + for row in range(1, img_height - 1): + for col in range(1, img_width - 1): + if edges[row, col] == 1: + + # gradient angle = 180 or -180 + if quantized_angles[row, col] == 180 or quantized_angles[row, col] == -180: + for margin in range(100 + 1): + inner_border = (col - 1) - margin + outer_border = (col - 2) - margin + + # outside image or intensity increasing from left to right + if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) <= 0: + break + + width_left = margin + 1 + + for margin in range(100 + 1): + inner_border = (col + 1) + margin + outer_border = (col + 2) + margin + + # outside image or intensity increasing from left to right + if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) >= 0: + break + + width_right = margin + 1 + + edge_widths[row, col] = width_left + width_right + + + # gradient angle = 0 + if quantized_angles[row, col] == 0: + for margin in range(100 + 1): + inner_border = (col - 1) - margin + outer_border = (col - 2) - margin + + # outside image or intensity decreasing from left to right + if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) >= 0: + break + + width_left = margin + 1 + + for margin in range(100 + 1): + inner_border = (col + 1) + margin + outer_border = (col + 2) + margin + + # outside image or intensity decreasing from left to right + if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) <= 0: + break + + width_right = margin + 1 + + edge_widths[row, col] = width_right + width_left + + return edge_widths + + +def _calculate_sharpness_metric(image, edges, edge_widths): + # type: (numpy.array, numpy.array, numpy.array) -> numpy.float64 + + # get the size of image + img_height, img_width = image.shape + + total_num_edges = 0 + hist_pblur = np.zeros(101) + + # maximum block indices + num_blocks_vertically = int(img_height / BLOCK_HEIGHT) + num_blocks_horizontally = int(img_width / BLOCK_WIDTH) + + # loop over the blocks + for i in range(num_blocks_vertically): + for j in range(num_blocks_horizontally): + + # get the row and col indices for the block pixel positions + rows = slice(BLOCK_HEIGHT * i, BLOCK_HEIGHT * (i + 1)) + cols = slice(BLOCK_WIDTH * j, BLOCK_WIDTH * (j + 1)) + + if is_edge_block(edges[rows, cols], THRESHOLD): + block_widths = edge_widths[rows, cols] + # rotate block to simulate column-major boolean indexing + block_widths = np.rot90(np.flipud(block_widths), 3) + block_widths = block_widths[block_widths != 0] + + block_contrast = get_block_contrast(image[rows, cols]) + block_jnb = WIDTH_JNB[block_contrast] + + # calculate the probability of blur detection at the edges + # detected in the block + prob_blur_detection = 1 - np.exp(-abs(block_widths/block_jnb) ** BETA) + + # update the statistics using the block information + for probability in prob_blur_detection: + bucket = int(round(probability * 100)) + hist_pblur[bucket] += 1 + total_num_edges += 1 + + # normalize the pdf + if total_num_edges > 0: + hist_pblur = hist_pblur / total_num_edges + + # calculate the sharpness metric + return np.sum(hist_pblur[:64]) + + +def is_edge_block(block, threshold): + # type: (numpy.ndarray, float) -> bool + """Decide whether the given block is an edge block.""" + return np.count_nonzero(block) > (block.size * threshold) + + +def get_block_contrast(block): + # type: (numpy.ndarray) -> int + return int(np.max(block) - np.min(block)) + + +def estimate_sharpness(image): + height, width = image.shape[:2] + + if image.ndim == 3: + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + return compute(image) diff --git a/imagelib/morph.py b/imagelib/morph.py index 8aa5114..bd3bbd8 100644 --- a/imagelib/morph.py +++ b/imagelib/morph.py @@ -1,37 +1,37 @@ -import numpy as np -import cv2 -from scipy.spatial import Delaunay - - -def applyAffineTransform(src, srcTri, dstTri, size) : - warpMat = cv2.getAffineTransform( np.float32(srcTri), np.float32(dstTri) ) - return cv2.warpAffine( src, warpMat, (size[0], size[1]), None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101 ) - -def morphTriangle(dst_img, src_img, st, dt) : - (h,w,c) = dst_img.shape - sr = np.array( cv2.boundingRect(np.float32(st)) ) - dr = np.array( cv2.boundingRect(np.float32(dt)) ) - sRect = st - sr[0:2] - dRect = dt - dr[0:2] - d_mask = np.zeros((dr[3], dr[2], c), dtype = np.float32) - cv2.fillConvexPoly(d_mask, np.int32(dRect), (1.0,)*c, 8, 0); - imgRect = src_img[sr[1]:sr[1] + sr[3], sr[0]:sr[0] + sr[2]] - size = (dr[2], dr[3]) - warpImage1 = applyAffineTransform(imgRect, sRect, dRect, size) - - if c == 1: - warpImage1 = np.expand_dims( warpImage1, -1 ) - - dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]] = dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]]*(1-d_mask) + warpImage1 * d_mask - -def morph_by_points (image, sp, dp): - if sp.shape != dp.shape: - raise ValueError ('morph_by_points() sp.shape != dp.shape') - (h,w,c) = image.shape - - result_image = np.zeros(image.shape, dtype = image.dtype) - - for tri in Delaunay(dp).simplices: - morphTriangle(result_image, image, sp[tri], dp[tri]) - +import numpy as np +import cv2 +from scipy.spatial import Delaunay + + +def applyAffineTransform(src, srcTri, dstTri, size) : + warpMat = cv2.getAffineTransform( np.float32(srcTri), np.float32(dstTri) ) + return cv2.warpAffine( src, warpMat, (size[0], size[1]), None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101 ) + +def morphTriangle(dst_img, src_img, st, dt) : + (h,w,c) = dst_img.shape + sr = np.array( cv2.boundingRect(np.float32(st)) ) + dr = np.array( cv2.boundingRect(np.float32(dt)) ) + sRect = st - sr[0:2] + dRect = dt - dr[0:2] + d_mask = np.zeros((dr[3], dr[2], c), dtype = np.float32) + cv2.fillConvexPoly(d_mask, np.int32(dRect), (1.0,)*c, 8, 0); + imgRect = src_img[sr[1]:sr[1] + sr[3], sr[0]:sr[0] + sr[2]] + size = (dr[2], dr[3]) + warpImage1 = applyAffineTransform(imgRect, sRect, dRect, size) + + if c == 1: + warpImage1 = np.expand_dims( warpImage1, -1 ) + + dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]] = dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]]*(1-d_mask) + warpImage1 * d_mask + +def morph_by_points (image, sp, dp): + if sp.shape != dp.shape: + raise ValueError ('morph_by_points() sp.shape != dp.shape') + (h,w,c) = image.shape + + result_image = np.zeros(image.shape, dtype = image.dtype) + + for tri in Delaunay(dp).simplices: + morphTriangle(result_image, image, sp[tri], dp[tri]) + return result_image \ No newline at end of file diff --git a/imagelib/reduce_colors.py b/imagelib/reduce_colors.py index 961f00d..4ff8823 100644 --- a/imagelib/reduce_colors.py +++ b/imagelib/reduce_colors.py @@ -1,14 +1,14 @@ -import numpy as np -import cv2 -from PIL import Image - -#n_colors = [0..256] -def reduce_colors (img_bgr, n_colors): - img_rgb = (img_bgr[...,::-1] * 255.0).astype(np.uint8) - img_rgb_pil = Image.fromarray(img_rgb) - img_rgb_pil_p = img_rgb_pil.convert('P', palette=Image.ADAPTIVE, colors=n_colors) - - img_rgb_p = img_rgb_pil_p.convert('RGB') - img_bgr = cv2.cvtColor( np.array(img_rgb_p, dtype=np.float32) / 255.0, cv2.COLOR_RGB2BGR ) - - return img_bgr +import numpy as np +import cv2 +from PIL import Image + +#n_colors = [0..256] +def reduce_colors (img_bgr, n_colors): + img_rgb = (img_bgr[...,::-1] * 255.0).astype(np.uint8) + img_rgb_pil = Image.fromarray(img_rgb) + img_rgb_pil_p = img_rgb_pil.convert('P', palette=Image.ADAPTIVE, colors=n_colors) + + img_rgb_p = img_rgb_pil_p.convert('RGB') + img_bgr = cv2.cvtColor( np.array(img_rgb_p, dtype=np.float32) / 255.0, cv2.COLOR_RGB2BGR ) + + return img_bgr diff --git a/imagelib/text.py b/imagelib/text.py index 2659db2..31639dc 100644 --- a/imagelib/text.py +++ b/imagelib/text.py @@ -1,64 +1,64 @@ -import localization -import numpy as np -from PIL import Image, ImageDraw, ImageFont - -pil_fonts = {} -def _get_pil_font (font, size): - global pil_fonts - try: - font_str_id = '%s_%d' % (font, size) - if font_str_id not in pil_fonts.keys(): - pil_fonts[font_str_id] = ImageFont.truetype(font + ".ttf", size=size, encoding="unic") - pil_font = pil_fonts[font_str_id] - return pil_font - except: - return ImageFont.load_default() - -def get_text_image( shape, text, color=(1,1,1), border=0.2, font=None): - h,w,c = shape - try: - pil_font = _get_pil_font( localization.get_default_ttf_font_name() , h-2) - - canvas = Image.new('RGB', (w,h) , (0,0,0) ) - draw = ImageDraw.Draw(canvas) - offset = ( 0, 0) - draw.text(offset, text, font=pil_font, fill=tuple((np.array(color)*255).astype(np.int)) ) - - result = np.asarray(canvas) / 255 - - if c > 3: - result = np.concatenate ( (result, np.ones ((h,w,c-3)) ), axis=-1 ) - elif c < 3: - result = result[...,0:c] - return result - except: - return np.zeros ( (h,w,c) ) - -def draw_text( image, rect, text, color=(1,1,1), border=0.2, font=None): - h,w,c = image.shape - - l,t,r,b = rect - l = np.clip (l, 0, w-1) - r = np.clip (r, 0, w-1) - t = np.clip (t, 0, h-1) - b = np.clip (b, 0, h-1) - - image[t:b, l:r] += get_text_image ( (b-t,r-l,c) , text, color, border, font ) - - -def draw_text_lines (image, rect, text_lines, color=(1,1,1), border=0.2, font=None): - text_lines_len = len(text_lines) - if text_lines_len == 0: - return - - l,t,r,b = rect - h = b-t - h_per_line = h // text_lines_len - - for i in range(0, text_lines_len): - draw_text (image, (l, i*h_per_line, r, (i+1)*h_per_line), text_lines[i], color, border, font) - -def get_draw_text_lines ( image, rect, text_lines, color=(1,1,1), border=0.2, font=None): - image = np.zeros ( image.shape, dtype=np.float ) - draw_text_lines ( image, rect, text_lines, color, border, font) - return image +import localization +import numpy as np +from PIL import Image, ImageDraw, ImageFont + +pil_fonts = {} +def _get_pil_font (font, size): + global pil_fonts + try: + font_str_id = '%s_%d' % (font, size) + if font_str_id not in pil_fonts.keys(): + pil_fonts[font_str_id] = ImageFont.truetype(font + ".ttf", size=size, encoding="unic") + pil_font = pil_fonts[font_str_id] + return pil_font + except: + return ImageFont.load_default() + +def get_text_image( shape, text, color=(1,1,1), border=0.2, font=None): + h,w,c = shape + try: + pil_font = _get_pil_font( localization.get_default_ttf_font_name() , h-2) + + canvas = Image.new('RGB', (w,h) , (0,0,0) ) + draw = ImageDraw.Draw(canvas) + offset = ( 0, 0) + draw.text(offset, text, font=pil_font, fill=tuple((np.array(color)*255).astype(np.int)) ) + + result = np.asarray(canvas) / 255 + + if c > 3: + result = np.concatenate ( (result, np.ones ((h,w,c-3)) ), axis=-1 ) + elif c < 3: + result = result[...,0:c] + return result + except: + return np.zeros ( (h,w,c) ) + +def draw_text( image, rect, text, color=(1,1,1), border=0.2, font=None): + h,w,c = image.shape + + l,t,r,b = rect + l = np.clip (l, 0, w-1) + r = np.clip (r, 0, w-1) + t = np.clip (t, 0, h-1) + b = np.clip (b, 0, h-1) + + image[t:b, l:r] += get_text_image ( (b-t,r-l,c) , text, color, border, font ) + + +def draw_text_lines (image, rect, text_lines, color=(1,1,1), border=0.2, font=None): + text_lines_len = len(text_lines) + if text_lines_len == 0: + return + + l,t,r,b = rect + h = b-t + h_per_line = h // text_lines_len + + for i in range(0, text_lines_len): + draw_text (image, (l, i*h_per_line, r, (i+1)*h_per_line), text_lines[i], color, border, font) + +def get_draw_text_lines ( image, rect, text_lines, color=(1,1,1), border=0.2, font=None): + image = np.zeros ( image.shape, dtype=np.float ) + draw_text_lines ( image, rect, text_lines, color, border, font) + return image diff --git a/imagelib/warp.py b/imagelib/warp.py index aa0f602..9d5754d 100644 --- a/imagelib/warp.py +++ b/imagelib/warp.py @@ -1,51 +1,51 @@ -import numpy as np -import cv2 -from utils import random_utils - -def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): - h,w,c = source.shape - if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024): - raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.') - - rotation = np.random.uniform( rotation_range[0], rotation_range[1] ) - scale = np.random.uniform(1 +scale_range[0], 1 +scale_range[1]) - tx = np.random.uniform( tx_range[0], tx_range[1] ) - ty = np.random.uniform( ty_range[0], ty_range[1] ) - - #random warp by grid - cell_size = [ w // (2**i) for i in range(1,4) ] [ np.random.randint(3) ] - cell_count = w // cell_size + 1 - - grid_points = np.linspace( 0, w, cell_count) - mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy() - mapy = mapx.T - - mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) - mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) - - half_cell_size = cell_size // 2 - - mapx = cv2.resize(mapx, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) - mapy = cv2.resize(mapy, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) - - #random transform - random_transform_mat = cv2.getRotationMatrix2D((w // 2, w // 2), rotation, scale) - random_transform_mat[:, 2] += (tx*w, ty*w) - - params = dict() - params['mapx'] = mapx - params['mapy'] = mapy - params['rmat'] = random_transform_mat - params['w'] = w - params['flip'] = flip and np.random.randint(10) < 4 - - return params - -def warp_by_params (params, img, warp, transform, flip, is_border_replicate): - if warp: - img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC ) - if transform: - img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if is_border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC ) - if flip and params['flip']: - img = img[:,::-1,...] +import numpy as np +import cv2 +from utils import random_utils + +def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): + h,w,c = source.shape + if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024): + raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.') + + rotation = np.random.uniform( rotation_range[0], rotation_range[1] ) + scale = np.random.uniform(1 +scale_range[0], 1 +scale_range[1]) + tx = np.random.uniform( tx_range[0], tx_range[1] ) + ty = np.random.uniform( ty_range[0], ty_range[1] ) + + #random warp by grid + cell_size = [ w // (2**i) for i in range(1,4) ] [ np.random.randint(3) ] + cell_count = w // cell_size + 1 + + grid_points = np.linspace( 0, w, cell_count) + mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy() + mapy = mapx.T + + mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + + half_cell_size = cell_size // 2 + + mapx = cv2.resize(mapx, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) + mapy = cv2.resize(mapy, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) + + #random transform + random_transform_mat = cv2.getRotationMatrix2D((w // 2, w // 2), rotation, scale) + random_transform_mat[:, 2] += (tx*w, ty*w) + + params = dict() + params['mapx'] = mapx + params['mapy'] = mapy + params['rmat'] = random_transform_mat + params['w'] = w + params['flip'] = flip and np.random.randint(10) < 4 + + return params + +def warp_by_params (params, img, warp, transform, flip, is_border_replicate): + if warp: + img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC ) + if transform: + img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if is_border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC ) + if flip and params['flip']: + img = img[:,::-1,...] return img \ No newline at end of file diff --git a/interact/__init__.py b/interact/__init__.py index db40e4f..d6f770a 100644 --- a/interact/__init__.py +++ b/interact/__init__.py @@ -1 +1 @@ -from .interact import interact +from .interact import interact diff --git a/interact/interact.py b/interact/interact.py index c9b22f1..323c157 100644 --- a/interact/interact.py +++ b/interact/interact.py @@ -1,404 +1,404 @@ -import multiprocessing -import os -import sys -import time -import types - -import cv2 -from tqdm import tqdm - -try: - import IPython #if success we are in colab - from IPython.display import display, clear_output - import PIL - import matplotlib.pyplot as plt - is_colab = True -except: - is_colab = False - -class InteractBase(object): - EVENT_LBUTTONDOWN = 1 - EVENT_LBUTTONUP = 2 - EVENT_MBUTTONDOWN = 3 - EVENT_MBUTTONUP = 4 - EVENT_RBUTTONDOWN = 5 - EVENT_RBUTTONUP = 6 - EVENT_MOUSEWHEEL = 10 - - def __init__(self): - self.named_windows = {} - self.capture_mouse_windows = {} - self.capture_keys_windows = {} - self.mouse_events = {} - self.key_events = {} - self.pg_bar = None - self.focus_wnd_name = None - - def is_support_windows(self): - return False - - def is_colab(self): - return False - - def on_destroy_all_windows(self): - raise NotImplemented - - def on_create_window (self, wnd_name): - raise NotImplemented - - def on_destroy_window (self, wnd_name): - raise NotImplemented - - def on_show_image (self, wnd_name, img): - raise NotImplemented - - def on_capture_mouse (self, wnd_name): - raise NotImplemented - - def on_capture_keys (self, wnd_name): - raise NotImplemented - - def on_process_messages(self, sleep_time=0): - raise NotImplemented - - def on_wait_any_key(self): - raise NotImplemented - - def log_info(self, msg, end='\n'): - print (msg, end=end) - - def log_err(self, msg, end='\n'): - print (msg, end=end) - - def named_window(self, wnd_name): - if wnd_name not in self.named_windows: - #we will show window only on first show_image - self.named_windows[wnd_name] = 0 - self.focus_wnd_name = wnd_name - else: print("named_window: ", wnd_name, " already created.") - - def destroy_all_windows(self): - if len( self.named_windows ) != 0: - self.on_destroy_all_windows() - self.named_windows = {} - self.capture_mouse_windows = {} - self.capture_keys_windows = {} - self.mouse_events = {} - self.key_events = {} - self.focus_wnd_name = None - - def destroy_window(self, wnd_name): - if wnd_name in self.named_windows: - self.on_destroy_window(wnd_name) - self.named_windows.pop(wnd_name) - - if wnd_name == self.focus_wnd_name: - self.focus_wnd_name = list(self.named_windows.keys())[-1] if len( self.named_windows ) != 0 else None - - if wnd_name in self.capture_mouse_windows: - self.capture_mouse_windows.pop(wnd_name) - - if wnd_name in self.capture_keys_windows: - self.capture_keys_windows.pop(wnd_name) - - if wnd_name in self.mouse_events: - self.mouse_events.pop(wnd_name) - - if wnd_name in self.key_events: - self.key_events.pop(wnd_name) - - def show_image(self, wnd_name, img): - if wnd_name in self.named_windows: - if self.named_windows[wnd_name] == 0: - self.named_windows[wnd_name] = 1 - self.on_create_window(wnd_name) - if wnd_name in self.capture_mouse_windows: - self.capture_mouse(wnd_name) - self.on_show_image(wnd_name,img) - else: print("show_image: named_window ", wnd_name, " not found.") - - def capture_mouse(self, wnd_name): - if wnd_name in self.named_windows: - self.capture_mouse_windows[wnd_name] = True - if self.named_windows[wnd_name] == 1: - self.on_capture_mouse(wnd_name) - else: print("capture_mouse: named_window ", wnd_name, " not found.") - - def capture_keys(self, wnd_name): - if wnd_name in self.named_windows: - if wnd_name not in self.capture_keys_windows: - self.capture_keys_windows[wnd_name] = True - self.on_capture_keys(wnd_name) - else: print("capture_keys: already set for window ", wnd_name) - else: print("capture_keys: named_window ", wnd_name, " not found.") - - def progress_bar(self, desc, total, leave=True): - if self.pg_bar is None: - self.pg_bar = tqdm( total=total, desc=desc, leave=leave, ascii=True ) - else: print("progress_bar: already set.") - - def progress_bar_inc(self, c): - if self.pg_bar is not None: - self.pg_bar.n += c - self.pg_bar.refresh() - else: print("progress_bar not set.") - - def progress_bar_close(self): - if self.pg_bar is not None: - self.pg_bar.close() - self.pg_bar = None - else: print("progress_bar not set.") - - def progress_bar_generator(self, data, desc, leave=True): - for x in tqdm( data, desc=desc, leave=leave, ascii=True ): - yield x - - def process_messages(self, sleep_time=0): - self.on_process_messages(sleep_time) - - def wait_any_key(self): - self.on_wait_any_key() - - def add_mouse_event(self, wnd_name, x, y, ev, flags): - if wnd_name not in self.mouse_events: - self.mouse_events[wnd_name] = [] - self.mouse_events[wnd_name] += [ (x, y, ev, flags) ] - - def add_key_event(self, wnd_name, ord_key, ctrl_pressed, alt_pressed, shift_pressed): - if wnd_name not in self.key_events: - self.key_events[wnd_name] = [] - self.key_events[wnd_name] += [ (ord_key, chr(ord_key), ctrl_pressed, alt_pressed, shift_pressed) ] - - def get_mouse_events(self, wnd_name): - ar = self.mouse_events.get(wnd_name, []) - self.mouse_events[wnd_name] = [] - return ar - - def get_key_events(self, wnd_name): - ar = self.key_events.get(wnd_name, []) - self.key_events[wnd_name] = [] - return ar - - def input_number(self, s, default_value, valid_list=None, help_message=None): - while True: - try: - inp = input(s) - if len(inp) == 0: - raise ValueError("") - - if help_message is not None and inp == '?': - print (help_message) - continue - - i = float(inp) - if (valid_list is not None) and (i not in valid_list): - return default_value - return i - except: - print (default_value) - return default_value - - def input_int(self,s, default_value, valid_list=None, help_message=None): - while True: - try: - inp = input(s) - if len(inp) == 0: - raise ValueError("") - - if help_message is not None and inp == '?': - print (help_message) - continue - - i = int(inp) - if (valid_list is not None) and (i not in valid_list): - return default_value - return i - except: - print (default_value) - return default_value - - def input_bool(self, s, default_value, help_message=None): - while True: - try: - inp = input(s) - if len(inp) == 0: - raise ValueError("") - - if help_message is not None and inp == '?': - print (help_message) - continue - - return bool ( {"y":True,"n":False,"1":True,"0":False}.get(inp.lower(), default_value) ) - except: - print ( "y" if default_value else "n" ) - return default_value - - def input_str(self, s, default_value, valid_list=None, help_message=None): - while True: - try: - inp = input(s) - if len(inp) == 0: - raise ValueError("") - - if help_message is not None and inp == '?': - print (help_message) - continue - - if valid_list is not None: - if inp.lower() in valid_list: - return inp.lower() - if inp in valid_list: - return inp - return default_value - - return inp - - except: - print (default_value) - return default_value - - def input_process(self, stdin_fd, sq, str): - sys.stdin = os.fdopen(stdin_fd) - try: - inp = input (str) - sq.put (True) - except: - sq.put (False) - - def input_in_time (self, str, max_time_sec): - sq = multiprocessing.Queue() - p = multiprocessing.Process(target=self.input_process, args=( sys.stdin.fileno(), sq, str)) - p.start() - t = time.time() - inp = False - while True: - if not sq.empty(): - inp = sq.get() - break - if time.time() - t > max_time_sec: - break - p.terminate() - sys.stdin = os.fdopen( sys.stdin.fileno() ) - return inp - - - -class InteractDesktop(InteractBase): - - def is_support_windows(self): - return True - - def on_destroy_all_windows(self): - cv2.destroyAllWindows() - - def on_create_window (self, wnd_name): - cv2.namedWindow(wnd_name) - - def on_destroy_window (self, wnd_name): - cv2.destroyWindow(wnd_name) - - def on_show_image (self, wnd_name, img): - cv2.imshow (wnd_name, img) - - def on_capture_mouse (self, wnd_name): - self.last_xy = (0,0) - - def onMouse(event, x, y, flags, param): - (inst, wnd_name) = param - if event == cv2.EVENT_LBUTTONDOWN: ev = InteractBase.EVENT_LBUTTONDOWN - elif event == cv2.EVENT_LBUTTONUP: ev = InteractBase.EVENT_LBUTTONUP - elif event == cv2.EVENT_RBUTTONDOWN: ev = InteractBase.EVENT_RBUTTONDOWN - elif event == cv2.EVENT_RBUTTONUP: ev = InteractBase.EVENT_RBUTTONUP - elif event == cv2.EVENT_MBUTTONDOWN: ev = InteractBase.EVENT_MBUTTONDOWN - elif event == cv2.EVENT_MBUTTONUP: ev = InteractBase.EVENT_MBUTTONUP - elif event == cv2.EVENT_MOUSEWHEEL: - ev = InteractBase.EVENT_MOUSEWHEEL - x,y = self.last_xy #fix opencv bug when window size more than screen size - else: ev = 0 - - self.last_xy = (x,y) - inst.add_mouse_event (wnd_name, x, y, ev, flags) - cv2.setMouseCallback(wnd_name, onMouse, (self,wnd_name) ) - - def on_capture_keys (self, wnd_name): - pass - - def on_process_messages(self, sleep_time=0): - - has_windows = False - has_capture_keys = False - - if len(self.named_windows) != 0: - has_windows = True - - if len(self.capture_keys_windows) != 0: - has_capture_keys = True - - if has_windows or has_capture_keys: - wait_key_time = max(1, int(sleep_time*1000) ) - ord_key = cv2.waitKey(wait_key_time) - shift_pressed = False - if ord_key != -1: - if chr(ord_key) >= 'A' and chr(ord_key) <= 'Z': - shift_pressed = True - ord_key += 32 - else: - if sleep_time != 0: - time.sleep(sleep_time) - - if has_capture_keys and ord_key != -1: - self.add_key_event ( self.focus_wnd_name, ord_key, False, False, shift_pressed) - - def on_wait_any_key(self): - cv2.waitKey(0) - -class InteractColab(InteractBase): - - def is_support_windows(self): - return False - - def is_colab(self): - return True - - def on_destroy_all_windows(self): - pass - #clear_output() - - def on_create_window (self, wnd_name): - pass - #clear_output() - - def on_destroy_window (self, wnd_name): - pass - - def on_show_image (self, wnd_name, img): - pass - # # cv2 stores colors as BGR; convert to RGB - # if img.ndim == 3: - # if img.shape[2] == 4: - # img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) - # else: - # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - # img = PIL.Image.fromarray(img) - # plt.imshow(img) - # plt.show() - - def on_capture_mouse (self, wnd_name): - pass - #print("on_capture_mouse(): Colab does not support") - - def on_capture_keys (self, wnd_name): - pass - #print("on_capture_keys(): Colab does not support") - - def on_process_messages(self, sleep_time=0): - time.sleep(sleep_time) - - def on_wait_any_key(self): - pass - #print("on_wait_any_key(): Colab does not support") - -if is_colab: - interact = InteractColab() -else: - interact = InteractDesktop() +import multiprocessing +import os +import sys +import time +import types + +import cv2 +from tqdm import tqdm + +try: + import IPython #if success we are in colab + from IPython.display import display, clear_output + import PIL + import matplotlib.pyplot as plt + is_colab = True +except: + is_colab = False + +class InteractBase(object): + EVENT_LBUTTONDOWN = 1 + EVENT_LBUTTONUP = 2 + EVENT_MBUTTONDOWN = 3 + EVENT_MBUTTONUP = 4 + EVENT_RBUTTONDOWN = 5 + EVENT_RBUTTONUP = 6 + EVENT_MOUSEWHEEL = 10 + + def __init__(self): + self.named_windows = {} + self.capture_mouse_windows = {} + self.capture_keys_windows = {} + self.mouse_events = {} + self.key_events = {} + self.pg_bar = None + self.focus_wnd_name = None + + def is_support_windows(self): + return False + + def is_colab(self): + return False + + def on_destroy_all_windows(self): + raise NotImplemented + + def on_create_window (self, wnd_name): + raise NotImplemented + + def on_destroy_window (self, wnd_name): + raise NotImplemented + + def on_show_image (self, wnd_name, img): + raise NotImplemented + + def on_capture_mouse (self, wnd_name): + raise NotImplemented + + def on_capture_keys (self, wnd_name): + raise NotImplemented + + def on_process_messages(self, sleep_time=0): + raise NotImplemented + + def on_wait_any_key(self): + raise NotImplemented + + def log_info(self, msg, end='\n'): + print (msg, end=end) + + def log_err(self, msg, end='\n'): + print (msg, end=end) + + def named_window(self, wnd_name): + if wnd_name not in self.named_windows: + #we will show window only on first show_image + self.named_windows[wnd_name] = 0 + self.focus_wnd_name = wnd_name + else: print("named_window: ", wnd_name, " already created.") + + def destroy_all_windows(self): + if len( self.named_windows ) != 0: + self.on_destroy_all_windows() + self.named_windows = {} + self.capture_mouse_windows = {} + self.capture_keys_windows = {} + self.mouse_events = {} + self.key_events = {} + self.focus_wnd_name = None + + def destroy_window(self, wnd_name): + if wnd_name in self.named_windows: + self.on_destroy_window(wnd_name) + self.named_windows.pop(wnd_name) + + if wnd_name == self.focus_wnd_name: + self.focus_wnd_name = list(self.named_windows.keys())[-1] if len( self.named_windows ) != 0 else None + + if wnd_name in self.capture_mouse_windows: + self.capture_mouse_windows.pop(wnd_name) + + if wnd_name in self.capture_keys_windows: + self.capture_keys_windows.pop(wnd_name) + + if wnd_name in self.mouse_events: + self.mouse_events.pop(wnd_name) + + if wnd_name in self.key_events: + self.key_events.pop(wnd_name) + + def show_image(self, wnd_name, img): + if wnd_name in self.named_windows: + if self.named_windows[wnd_name] == 0: + self.named_windows[wnd_name] = 1 + self.on_create_window(wnd_name) + if wnd_name in self.capture_mouse_windows: + self.capture_mouse(wnd_name) + self.on_show_image(wnd_name,img) + else: print("show_image: named_window ", wnd_name, " not found.") + + def capture_mouse(self, wnd_name): + if wnd_name in self.named_windows: + self.capture_mouse_windows[wnd_name] = True + if self.named_windows[wnd_name] == 1: + self.on_capture_mouse(wnd_name) + else: print("capture_mouse: named_window ", wnd_name, " not found.") + + def capture_keys(self, wnd_name): + if wnd_name in self.named_windows: + if wnd_name not in self.capture_keys_windows: + self.capture_keys_windows[wnd_name] = True + self.on_capture_keys(wnd_name) + else: print("capture_keys: already set for window ", wnd_name) + else: print("capture_keys: named_window ", wnd_name, " not found.") + + def progress_bar(self, desc, total, leave=True): + if self.pg_bar is None: + self.pg_bar = tqdm( total=total, desc=desc, leave=leave, ascii=True ) + else: print("progress_bar: already set.") + + def progress_bar_inc(self, c): + if self.pg_bar is not None: + self.pg_bar.n += c + self.pg_bar.refresh() + else: print("progress_bar not set.") + + def progress_bar_close(self): + if self.pg_bar is not None: + self.pg_bar.close() + self.pg_bar = None + else: print("progress_bar not set.") + + def progress_bar_generator(self, data, desc, leave=True): + for x in tqdm( data, desc=desc, leave=leave, ascii=True ): + yield x + + def process_messages(self, sleep_time=0): + self.on_process_messages(sleep_time) + + def wait_any_key(self): + self.on_wait_any_key() + + def add_mouse_event(self, wnd_name, x, y, ev, flags): + if wnd_name not in self.mouse_events: + self.mouse_events[wnd_name] = [] + self.mouse_events[wnd_name] += [ (x, y, ev, flags) ] + + def add_key_event(self, wnd_name, ord_key, ctrl_pressed, alt_pressed, shift_pressed): + if wnd_name not in self.key_events: + self.key_events[wnd_name] = [] + self.key_events[wnd_name] += [ (ord_key, chr(ord_key), ctrl_pressed, alt_pressed, shift_pressed) ] + + def get_mouse_events(self, wnd_name): + ar = self.mouse_events.get(wnd_name, []) + self.mouse_events[wnd_name] = [] + return ar + + def get_key_events(self, wnd_name): + ar = self.key_events.get(wnd_name, []) + self.key_events[wnd_name] = [] + return ar + + def input_number(self, s, default_value, valid_list=None, help_message=None): + while True: + try: + inp = input(s) + if len(inp) == 0: + raise ValueError("") + + if help_message is not None and inp == '?': + print (help_message) + continue + + i = float(inp) + if (valid_list is not None) and (i not in valid_list): + return default_value + return i + except: + print (default_value) + return default_value + + def input_int(self,s, default_value, valid_list=None, help_message=None): + while True: + try: + inp = input(s) + if len(inp) == 0: + raise ValueError("") + + if help_message is not None and inp == '?': + print (help_message) + continue + + i = int(inp) + if (valid_list is not None) and (i not in valid_list): + return default_value + return i + except: + print (default_value) + return default_value + + def input_bool(self, s, default_value, help_message=None): + while True: + try: + inp = input(s) + if len(inp) == 0: + raise ValueError("") + + if help_message is not None and inp == '?': + print (help_message) + continue + + return bool ( {"y":True,"n":False,"1":True,"0":False}.get(inp.lower(), default_value) ) + except: + print ( "y" if default_value else "n" ) + return default_value + + def input_str(self, s, default_value, valid_list=None, help_message=None): + while True: + try: + inp = input(s) + if len(inp) == 0: + raise ValueError("") + + if help_message is not None and inp == '?': + print (help_message) + continue + + if valid_list is not None: + if inp.lower() in valid_list: + return inp.lower() + if inp in valid_list: + return inp + return default_value + + return inp + + except: + print (default_value) + return default_value + + def input_process(self, stdin_fd, sq, str): + sys.stdin = os.fdopen(stdin_fd) + try: + inp = input (str) + sq.put (True) + except: + sq.put (False) + + def input_in_time (self, str, max_time_sec): + sq = multiprocessing.Queue() + p = multiprocessing.Process(target=self.input_process, args=( sys.stdin.fileno(), sq, str)) + p.start() + t = time.time() + inp = False + while True: + if not sq.empty(): + inp = sq.get() + break + if time.time() - t > max_time_sec: + break + p.terminate() + sys.stdin = os.fdopen( sys.stdin.fileno() ) + return inp + + + +class InteractDesktop(InteractBase): + + def is_support_windows(self): + return True + + def on_destroy_all_windows(self): + cv2.destroyAllWindows() + + def on_create_window (self, wnd_name): + cv2.namedWindow(wnd_name) + + def on_destroy_window (self, wnd_name): + cv2.destroyWindow(wnd_name) + + def on_show_image (self, wnd_name, img): + cv2.imshow (wnd_name, img) + + def on_capture_mouse (self, wnd_name): + self.last_xy = (0,0) + + def onMouse(event, x, y, flags, param): + (inst, wnd_name) = param + if event == cv2.EVENT_LBUTTONDOWN: ev = InteractBase.EVENT_LBUTTONDOWN + elif event == cv2.EVENT_LBUTTONUP: ev = InteractBase.EVENT_LBUTTONUP + elif event == cv2.EVENT_RBUTTONDOWN: ev = InteractBase.EVENT_RBUTTONDOWN + elif event == cv2.EVENT_RBUTTONUP: ev = InteractBase.EVENT_RBUTTONUP + elif event == cv2.EVENT_MBUTTONDOWN: ev = InteractBase.EVENT_MBUTTONDOWN + elif event == cv2.EVENT_MBUTTONUP: ev = InteractBase.EVENT_MBUTTONUP + elif event == cv2.EVENT_MOUSEWHEEL: + ev = InteractBase.EVENT_MOUSEWHEEL + x,y = self.last_xy #fix opencv bug when window size more than screen size + else: ev = 0 + + self.last_xy = (x,y) + inst.add_mouse_event (wnd_name, x, y, ev, flags) + cv2.setMouseCallback(wnd_name, onMouse, (self,wnd_name) ) + + def on_capture_keys (self, wnd_name): + pass + + def on_process_messages(self, sleep_time=0): + + has_windows = False + has_capture_keys = False + + if len(self.named_windows) != 0: + has_windows = True + + if len(self.capture_keys_windows) != 0: + has_capture_keys = True + + if has_windows or has_capture_keys: + wait_key_time = max(1, int(sleep_time*1000) ) + ord_key = cv2.waitKey(wait_key_time) + shift_pressed = False + if ord_key != -1: + if chr(ord_key) >= 'A' and chr(ord_key) <= 'Z': + shift_pressed = True + ord_key += 32 + else: + if sleep_time != 0: + time.sleep(sleep_time) + + if has_capture_keys and ord_key != -1: + self.add_key_event ( self.focus_wnd_name, ord_key, False, False, shift_pressed) + + def on_wait_any_key(self): + cv2.waitKey(0) + +class InteractColab(InteractBase): + + def is_support_windows(self): + return False + + def is_colab(self): + return True + + def on_destroy_all_windows(self): + pass + #clear_output() + + def on_create_window (self, wnd_name): + pass + #clear_output() + + def on_destroy_window (self, wnd_name): + pass + + def on_show_image (self, wnd_name, img): + pass + # # cv2 stores colors as BGR; convert to RGB + # if img.ndim == 3: + # if img.shape[2] == 4: + # img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) + # else: + # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + # img = PIL.Image.fromarray(img) + # plt.imshow(img) + # plt.show() + + def on_capture_mouse (self, wnd_name): + pass + #print("on_capture_mouse(): Colab does not support") + + def on_capture_keys (self, wnd_name): + pass + #print("on_capture_keys(): Colab does not support") + + def on_process_messages(self, sleep_time=0): + time.sleep(sleep_time) + + def on_wait_any_key(self): + pass + #print("on_wait_any_key(): Colab does not support") + +if is_colab: + interact = InteractColab() +else: + interact = InteractDesktop() diff --git a/joblib/SubprocessFunctionCaller.py b/joblib/SubprocessFunctionCaller.py index 4ea3101..f7997b4 100644 --- a/joblib/SubprocessFunctionCaller.py +++ b/joblib/SubprocessFunctionCaller.py @@ -1,42 +1,42 @@ -import time -import multiprocessing - -class SubprocessFunctionCaller(object): - class CliFunction(object): - def __init__(self, s2c, c2s, lock): - self.s2c = s2c - self.c2s = c2s - self.lock = lock - - def __call__(self, *args, **kwargs): - self.lock.acquire() - self.c2s.put ( {'args':args, 'kwargs':kwargs} ) - while True: - if not self.s2c.empty(): - obj = self.s2c.get() - self.lock.release() - return obj - time.sleep(0.005) - - class HostProcessor(object): - def __init__(self, s2c, c2s, func): - self.s2c = s2c - self.c2s = c2s - self.func = func - - def process_messages(self): - while not self.c2s.empty(): - obj = self.c2s.get() - result = self.func ( *obj['args'], **obj['kwargs'] ) - self.s2c.put (result) - - @staticmethod - def make_pair( func ): - s2c = multiprocessing.Queue() - c2s = multiprocessing.Queue() - lock = multiprocessing.Lock() - - host_processor = SubprocessFunctionCaller.HostProcessor (s2c, c2s, func) - cli_func = SubprocessFunctionCaller.CliFunction (s2c, c2s, lock) - - return host_processor, cli_func +import time +import multiprocessing + +class SubprocessFunctionCaller(object): + class CliFunction(object): + def __init__(self, s2c, c2s, lock): + self.s2c = s2c + self.c2s = c2s + self.lock = lock + + def __call__(self, *args, **kwargs): + self.lock.acquire() + self.c2s.put ( {'args':args, 'kwargs':kwargs} ) + while True: + if not self.s2c.empty(): + obj = self.s2c.get() + self.lock.release() + return obj + time.sleep(0.005) + + class HostProcessor(object): + def __init__(self, s2c, c2s, func): + self.s2c = s2c + self.c2s = c2s + self.func = func + + def process_messages(self): + while not self.c2s.empty(): + obj = self.c2s.get() + result = self.func ( *obj['args'], **obj['kwargs'] ) + self.s2c.put (result) + + @staticmethod + def make_pair( func ): + s2c = multiprocessing.Queue() + c2s = multiprocessing.Queue() + lock = multiprocessing.Lock() + + host_processor = SubprocessFunctionCaller.HostProcessor (s2c, c2s, func) + cli_func = SubprocessFunctionCaller.CliFunction (s2c, c2s, lock) + + return host_processor, cli_func diff --git a/joblib/SubprocessorBase.py b/joblib/SubprocessorBase.py index dd48424..91540fd 100644 --- a/joblib/SubprocessorBase.py +++ b/joblib/SubprocessorBase.py @@ -1,288 +1,288 @@ -import traceback -import multiprocessing -import time -import sys -from interact import interact as io - - -class Subprocessor(object): - - class SilenceException(Exception): - pass - - class Cli(object): - def __init__ ( self, client_dict ): - self.s2c = multiprocessing.Queue() - self.c2s = multiprocessing.Queue() - self.p = multiprocessing.Process(target=self._subprocess_run, args=(client_dict,) ) - self.p.daemon = True - self.p.start() - - self.state = None - self.sent_time = None - self.sent_data = None - self.name = None - self.host_dict = None - - def kill(self): - self.p.terminate() - self.p.join() - - #overridable optional - def on_initialize(self, client_dict): - #initialize your subprocess here using client_dict - pass - - #overridable optional - def on_finalize(self): - #finalize your subprocess here - pass - - #overridable - def process_data(self, data): - #process 'data' given from host and return result - raise NotImplementedError - - #overridable optional - def get_data_name (self, data): - #return string identificator of your 'data' - return "undefined" - - def log_info(self, msg): self.c2s.put ( {'op': 'log_info', 'msg':msg } ) - def log_err(self, msg): self.c2s.put ( {'op': 'log_err' , 'msg':msg } ) - def progress_bar_inc(self, c): self.c2s.put ( {'op': 'progress_bar_inc' , 'c':c } ) - - def _subprocess_run(self, client_dict): - data = None - s2c, c2s = self.s2c, self.c2s - try: - self.on_initialize(client_dict) - - c2s.put ( {'op': 'init_ok'} ) - - while True: - msg = s2c.get() - op = msg.get('op','') - if op == 'data': - data = msg['data'] - result = self.process_data (data) - c2s.put ( {'op': 'success', 'data' : data, 'result' : result} ) - data = None - elif op == 'close': - break - - time.sleep(0.001) - - self.on_finalize() - c2s.put ( {'op': 'finalized'} ) - return - except Subprocessor.SilenceException as e: - pass - except Exception as e: - if data is not None: - print ('Exception while process data [%s]: %s' % (self.get_data_name(data), traceback.format_exc()) ) - else: - print ('Exception: %s' % (traceback.format_exc()) ) - - c2s.put ( {'op': 'error', 'data' : data} ) - - #overridable - def __init__(self, name, SubprocessorCli_class, no_response_time_sec = 0): - if not issubclass(SubprocessorCli_class, Subprocessor.Cli): - raise ValueError("SubprocessorCli_class must be subclass of Subprocessor.Cli") - - self.name = name - self.SubprocessorCli_class = SubprocessorCli_class - self.no_response_time_sec = no_response_time_sec - - #overridable - def process_info_generator(self): - #yield per process (name, host_dict, client_dict) - raise NotImplementedError - - #overridable optional - def on_clients_initialized(self): - #logic when all subprocesses initialized and ready - pass - - #overridable optional - def on_clients_finalized(self): - #logic when all subprocess finalized - pass - - #overridable - def get_data(self, host_dict): - #return data for processing here - raise NotImplementedError - - #overridable - def on_data_return (self, host_dict, data): - #you have to place returned 'data' back to your queue - raise NotImplementedError - - #overridable - def on_result (self, host_dict, data, result): - #your logic what to do with 'result' of 'data' - raise NotImplementedError - - #overridable - def get_result(self): - #return result that will be returned in func run() - raise NotImplementedError - - #overridable - def on_tick(self): - #tick in main loop - pass - - #overridable - def on_check_run(self): - return True - - def run(self): - if not self.on_check_run(): - return self.get_result() - - self.clis = [] - - #getting info about name of subprocesses, host and client dicts, and spawning them - for name, host_dict, client_dict in self.process_info_generator(): - try: - cli = self.SubprocessorCli_class(client_dict) - cli.state = 1 - cli.sent_time = time.time() - cli.sent_data = None - cli.name = name - cli.host_dict = host_dict - - self.clis.append (cli) - - while True: - while not cli.c2s.empty(): - obj = cli.c2s.get() - op = obj.get('op','') - if op == 'init_ok': - cli.state = 0 - elif op == 'log_info': - io.log_info(obj['msg']) - elif op == 'log_err': - io.log_err(obj['msg']) - elif op == 'error': - cli.kill() - self.clis.remove(cli) - break - if cli.state == 0: - break - io.process_messages(0.005) - except: - raise Exception ("Unable to start subprocess %s" % (name)) - - if len(self.clis) == 0: - raise Exception ("Unable to start Subprocessor '%s' " % (self.name)) - - #waiting subprocesses their success(or not) initialization - while True: - for cli in self.clis[:]: - while not cli.c2s.empty(): - obj = cli.c2s.get() - op = obj.get('op','') - if op == 'init_ok': - cli.state = 0 - elif op == 'log_info': - io.log_info(obj['msg']) - elif op == 'log_err': - io.log_err(obj['msg']) - elif op == 'error': - cli.kill() - self.clis.remove(cli) - break - if all ([cli.state == 0 for cli in self.clis]): - break - io.process_messages(0.005) - - if len(self.clis) == 0: - raise Exception ( "Unable to start subprocesses." ) - - #ok some processes survived, initialize host logic - - self.on_clients_initialized() - - #main loop of data processing - while True: - for cli in self.clis[:]: - while not cli.c2s.empty(): - obj = cli.c2s.get() - op = obj.get('op','') - if op == 'success': - #success processed data, return data and result to on_result - self.on_result (cli.host_dict, obj['data'], obj['result']) - self.sent_data = None - cli.state = 0 - elif op == 'error': - #some error occured while process data, returning chunk to on_data_return - if 'data' in obj.keys(): - self.on_data_return (cli.host_dict, obj['data'] ) - #and killing process - cli.kill() - self.clis.remove(cli) - elif op == 'log_info': - io.log_info(obj['msg']) - elif op == 'log_err': - io.log_err(obj['msg']) - elif op == 'progress_bar_inc': - io.progress_bar_inc(obj['c']) - - for cli in self.clis[:]: - if cli.state == 0: - #free state of subprocess, get some data from get_data - data = self.get_data(cli.host_dict) - if data is not None: - #and send it to subprocess - cli.s2c.put ( {'op': 'data', 'data' : data} ) - cli.sent_time = time.time() - cli.sent_data = data - cli.state = 1 - - elif cli.state == 1: - if self.no_response_time_sec != 0 and (time.time() - cli.sent_time) > self.no_response_time_sec: - #subprocess busy too long - print ( '%s doesnt response, terminating it.' % (cli.name) ) - self.on_data_return (cli.host_dict, cli.sent_data ) - cli.kill() - self.clis.remove(cli) - - if all ([cli.state == 0 for cli in self.clis]): - #all subprocesses free and no more data available to process, ending loop - break - io.process_messages(0.005) - self.on_tick() - - #gracefully terminating subprocesses - for cli in self.clis[:]: - cli.s2c.put ( {'op': 'close'} ) - cli.sent_time = time.time() - - while True: - for cli in self.clis[:]: - terminate_it = False - while not cli.c2s.empty(): - obj = cli.c2s.get() - obj_op = obj['op'] - if obj_op == 'finalized': - terminate_it = True - break - - if self.no_response_time_sec != 0 and (time.time() - cli.sent_time) > self.no_response_time_sec: - terminate_it = True - - if terminate_it: - cli.state = 2 - cli.kill() - - if all ([cli.state == 2 for cli in self.clis]): - break - - #finalizing host logic and return result - self.on_clients_finalized() - - return self.get_result() +import traceback +import multiprocessing +import time +import sys +from interact import interact as io + + +class Subprocessor(object): + + class SilenceException(Exception): + pass + + class Cli(object): + def __init__ ( self, client_dict ): + self.s2c = multiprocessing.Queue() + self.c2s = multiprocessing.Queue() + self.p = multiprocessing.Process(target=self._subprocess_run, args=(client_dict,) ) + self.p.daemon = True + self.p.start() + + self.state = None + self.sent_time = None + self.sent_data = None + self.name = None + self.host_dict = None + + def kill(self): + self.p.terminate() + self.p.join() + + #overridable optional + def on_initialize(self, client_dict): + #initialize your subprocess here using client_dict + pass + + #overridable optional + def on_finalize(self): + #finalize your subprocess here + pass + + #overridable + def process_data(self, data): + #process 'data' given from host and return result + raise NotImplementedError + + #overridable optional + def get_data_name (self, data): + #return string identificator of your 'data' + return "undefined" + + def log_info(self, msg): self.c2s.put ( {'op': 'log_info', 'msg':msg } ) + def log_err(self, msg): self.c2s.put ( {'op': 'log_err' , 'msg':msg } ) + def progress_bar_inc(self, c): self.c2s.put ( {'op': 'progress_bar_inc' , 'c':c } ) + + def _subprocess_run(self, client_dict): + data = None + s2c, c2s = self.s2c, self.c2s + try: + self.on_initialize(client_dict) + + c2s.put ( {'op': 'init_ok'} ) + + while True: + msg = s2c.get() + op = msg.get('op','') + if op == 'data': + data = msg['data'] + result = self.process_data (data) + c2s.put ( {'op': 'success', 'data' : data, 'result' : result} ) + data = None + elif op == 'close': + break + + time.sleep(0.001) + + self.on_finalize() + c2s.put ( {'op': 'finalized'} ) + return + except Subprocessor.SilenceException as e: + pass + except Exception as e: + if data is not None: + print ('Exception while process data [%s]: %s' % (self.get_data_name(data), traceback.format_exc()) ) + else: + print ('Exception: %s' % (traceback.format_exc()) ) + + c2s.put ( {'op': 'error', 'data' : data} ) + + #overridable + def __init__(self, name, SubprocessorCli_class, no_response_time_sec = 0): + if not issubclass(SubprocessorCli_class, Subprocessor.Cli): + raise ValueError("SubprocessorCli_class must be subclass of Subprocessor.Cli") + + self.name = name + self.SubprocessorCli_class = SubprocessorCli_class + self.no_response_time_sec = no_response_time_sec + + #overridable + def process_info_generator(self): + #yield per process (name, host_dict, client_dict) + raise NotImplementedError + + #overridable optional + def on_clients_initialized(self): + #logic when all subprocesses initialized and ready + pass + + #overridable optional + def on_clients_finalized(self): + #logic when all subprocess finalized + pass + + #overridable + def get_data(self, host_dict): + #return data for processing here + raise NotImplementedError + + #overridable + def on_data_return (self, host_dict, data): + #you have to place returned 'data' back to your queue + raise NotImplementedError + + #overridable + def on_result (self, host_dict, data, result): + #your logic what to do with 'result' of 'data' + raise NotImplementedError + + #overridable + def get_result(self): + #return result that will be returned in func run() + raise NotImplementedError + + #overridable + def on_tick(self): + #tick in main loop + pass + + #overridable + def on_check_run(self): + return True + + def run(self): + if not self.on_check_run(): + return self.get_result() + + self.clis = [] + + #getting info about name of subprocesses, host and client dicts, and spawning them + for name, host_dict, client_dict in self.process_info_generator(): + try: + cli = self.SubprocessorCli_class(client_dict) + cli.state = 1 + cli.sent_time = time.time() + cli.sent_data = None + cli.name = name + cli.host_dict = host_dict + + self.clis.append (cli) + + while True: + while not cli.c2s.empty(): + obj = cli.c2s.get() + op = obj.get('op','') + if op == 'init_ok': + cli.state = 0 + elif op == 'log_info': + io.log_info(obj['msg']) + elif op == 'log_err': + io.log_err(obj['msg']) + elif op == 'error': + cli.kill() + self.clis.remove(cli) + break + if cli.state == 0: + break + io.process_messages(0.005) + except: + raise Exception ("Unable to start subprocess %s" % (name)) + + if len(self.clis) == 0: + raise Exception ("Unable to start Subprocessor '%s' " % (self.name)) + + #waiting subprocesses their success(or not) initialization + while True: + for cli in self.clis[:]: + while not cli.c2s.empty(): + obj = cli.c2s.get() + op = obj.get('op','') + if op == 'init_ok': + cli.state = 0 + elif op == 'log_info': + io.log_info(obj['msg']) + elif op == 'log_err': + io.log_err(obj['msg']) + elif op == 'error': + cli.kill() + self.clis.remove(cli) + break + if all ([cli.state == 0 for cli in self.clis]): + break + io.process_messages(0.005) + + if len(self.clis) == 0: + raise Exception ( "Unable to start subprocesses." ) + + #ok some processes survived, initialize host logic + + self.on_clients_initialized() + + #main loop of data processing + while True: + for cli in self.clis[:]: + while not cli.c2s.empty(): + obj = cli.c2s.get() + op = obj.get('op','') + if op == 'success': + #success processed data, return data and result to on_result + self.on_result (cli.host_dict, obj['data'], obj['result']) + self.sent_data = None + cli.state = 0 + elif op == 'error': + #some error occured while process data, returning chunk to on_data_return + if 'data' in obj.keys(): + self.on_data_return (cli.host_dict, obj['data'] ) + #and killing process + cli.kill() + self.clis.remove(cli) + elif op == 'log_info': + io.log_info(obj['msg']) + elif op == 'log_err': + io.log_err(obj['msg']) + elif op == 'progress_bar_inc': + io.progress_bar_inc(obj['c']) + + for cli in self.clis[:]: + if cli.state == 0: + #free state of subprocess, get some data from get_data + data = self.get_data(cli.host_dict) + if data is not None: + #and send it to subprocess + cli.s2c.put ( {'op': 'data', 'data' : data} ) + cli.sent_time = time.time() + cli.sent_data = data + cli.state = 1 + + elif cli.state == 1: + if self.no_response_time_sec != 0 and (time.time() - cli.sent_time) > self.no_response_time_sec: + #subprocess busy too long + print ( '%s doesnt response, terminating it.' % (cli.name) ) + self.on_data_return (cli.host_dict, cli.sent_data ) + cli.kill() + self.clis.remove(cli) + + if all ([cli.state == 0 for cli in self.clis]): + #all subprocesses free and no more data available to process, ending loop + break + io.process_messages(0.005) + self.on_tick() + + #gracefully terminating subprocesses + for cli in self.clis[:]: + cli.s2c.put ( {'op': 'close'} ) + cli.sent_time = time.time() + + while True: + for cli in self.clis[:]: + terminate_it = False + while not cli.c2s.empty(): + obj = cli.c2s.get() + obj_op = obj['op'] + if obj_op == 'finalized': + terminate_it = True + break + + if self.no_response_time_sec != 0 and (time.time() - cli.sent_time) > self.no_response_time_sec: + terminate_it = True + + if terminate_it: + cli.state = 2 + cli.kill() + + if all ([cli.state == 2 for cli in self.clis]): + break + + #finalizing host logic and return result + self.on_clients_finalized() + + return self.get_result() diff --git a/joblib/__init__.py b/joblib/__init__.py index fbbc20c..651050a 100644 --- a/joblib/__init__.py +++ b/joblib/__init__.py @@ -1,2 +1,2 @@ -from .SubprocessorBase import Subprocessor -from .SubprocessFunctionCaller import SubprocessFunctionCaller +from .SubprocessorBase import Subprocessor +from .SubprocessFunctionCaller import SubprocessFunctionCaller diff --git a/localization/__init__.py b/localization/__init__.py index f3bcf09..4e77f1f 100644 --- a/localization/__init__.py +++ b/localization/__init__.py @@ -1,2 +1,2 @@ -from .localization import get_default_ttf_font_name - +from .localization import get_default_ttf_font_name + diff --git a/localization/localization.py b/localization/localization.py index d15be0b..01f1d1e 100644 --- a/localization/localization.py +++ b/localization/localization.py @@ -1,30 +1,30 @@ -import sys -import locale - -system_locale = locale.getdefaultlocale()[0] -# system_locale may be nil -system_language = system_locale[0:2] if system_locale is not None else "en" - -windows_font_name_map = { - 'en' : 'cour', - 'ru' : 'cour', - 'zn' : 'simsun_01' -} - -darwin_font_name_map = { - 'en' : 'cour', - 'ru' : 'cour', - 'zn' : 'Apple LiSung Light' -} - -linux_font_name_map = { - 'en' : 'cour', - 'ru' : 'cour', - 'zn' : 'cour' -} - -def get_default_ttf_font_name(): - platform = sys.platform - if platform == 'win32': return windows_font_name_map.get(system_language, 'cour') - elif platform == 'darwin': return darwin_font_name_map.get(system_language, 'cour') - else: return linux_font_name_map.get(system_language, 'cour') +import sys +import locale + +system_locale = locale.getdefaultlocale()[0] +# system_locale may be nil +system_language = system_locale[0:2] if system_locale is not None else "en" + +windows_font_name_map = { + 'en' : 'cour', + 'ru' : 'cour', + 'zn' : 'simsun_01' +} + +darwin_font_name_map = { + 'en' : 'cour', + 'ru' : 'cour', + 'zn' : 'Apple LiSung Light' +} + +linux_font_name_map = { + 'en' : 'cour', + 'ru' : 'cour', + 'zn' : 'cour' +} + +def get_default_ttf_font_name(): + platform = sys.platform + if platform == 'win32': return windows_font_name_map.get(system_language, 'cour') + elif platform == 'darwin': return darwin_font_name_map.get(system_language, 'cour') + else: return linux_font_name_map.get(system_language, 'cour') diff --git a/main.py b/main.py index 8093c77..ebce793 100644 --- a/main.py +++ b/main.py @@ -1,276 +1,276 @@ -import os -import sys -import time -import argparse -import multiprocessing -from utils import Path_utils -from utils import os_utils -from pathlib import Path - -if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 6): - raise Exception("This program requires at least Python 3.6") - -class fixPathAction(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, os.path.abspath(os.path.expanduser(values))) - -if __name__ == "__main__": - multiprocessing.set_start_method("spawn") - - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers() - - def process_extract(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Extractor - Extractor.main( arguments.input_dir, - arguments.output_dir, - arguments.debug_dir, - arguments.detector, - arguments.manual_fix, - arguments.manual_output_debug_fix, - arguments.manual_window_size, - face_type=arguments.face_type, - device_args={'cpu_only' : arguments.cpu_only, - 'multi_gpu' : arguments.multi_gpu, - } - ) - - p = subparsers.add_parser( "extract", help="Extract the faces from a pictures.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.") - p.add_argument('--debug-dir', action=fixPathAction, dest="debug_dir", help="Writes debug images to this directory.") - p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'") - p.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.") - p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") - p.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") - p.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.") - p.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=1368, help="Manual fix window size. Default: 1368.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU. Forces to use MT extractor.") - p.set_defaults (func=process_extract) - - - def process_dev_extract_umd_csv(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Extractor - Extractor.extract_umd_csv( arguments.input_csv_file, - device_args={'cpu_only' : arguments.cpu_only, - 'multi_gpu' : arguments.multi_gpu, - } - ) - - p = subparsers.add_parser( "dev_extract_umd_csv", help="") - p.add_argument('--input-csv-file', required=True, action=fixPathAction, dest="input_csv_file", help="input_csv_file") - p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU.") - p.set_defaults (func=process_dev_extract_umd_csv) - """ - def process_extract_fanseg(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Extractor - Extractor.extract_fanseg( arguments.input_dir, - device_args={'cpu_only' : arguments.cpu_only, - 'multi_gpu' : arguments.multi_gpu, - } - ) - - p = subparsers.add_parser( "extract_fanseg", help="Extract fanseg mask from faces.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU.") - p.set_defaults (func=process_extract_fanseg) - """ - - def process_sort(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Sorter - Sorter.main (input_path=arguments.input_dir, sort_by_method=arguments.sort_by_method) - - p = subparsers.add_parser( "sort", help="Sort faces in a directory.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "face-pitch", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "oneface", "final", "final-no-blur", "test"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) - p.set_defaults (func=process_sort) - - def process_util(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Util - - if arguments.convert_png_to_jpg: - Util.convert_png_to_jpg_folder (input_path=arguments.input_dir) - - if arguments.add_landmarks_debug_images: - Util.add_landmarks_debug_images (input_path=arguments.input_dir) - - if arguments.recover_original_aligned_filename: - Util.recover_original_aligned_filename (input_path=arguments.input_dir) - - #if arguments.remove_fanseg: - # Util.remove_fanseg_folder (input_path=arguments.input_dir) - - p = subparsers.add_parser( "util", help="Utilities.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--convert-png-to-jpg', action="store_true", dest="convert_png_to_jpg", default=False, help="Convert DeepFaceLAB PNG files to JPEG.") - p.add_argument('--add-landmarks-debug-images', action="store_true", dest="add_landmarks_debug_images", default=False, help="Add landmarks debug image for aligned faces.") - p.add_argument('--recover-original-aligned-filename', action="store_true", dest="recover_original_aligned_filename", default=False, help="Recover original aligned filename.") - #p.add_argument('--remove-fanseg', action="store_true", dest="remove_fanseg", default=False, help="Remove fanseg mask from aligned faces.") - - p.set_defaults (func=process_util) - - def process_train(arguments): - os_utils.set_process_lowest_prio() - args = {'training_data_src_dir' : arguments.training_data_src_dir, - 'training_data_dst_dir' : arguments.training_data_dst_dir, - 'pretraining_data_dir' : arguments.pretraining_data_dir, - 'model_path' : arguments.model_dir, - 'model_name' : arguments.model_name, - 'no_preview' : arguments.no_preview, - 'debug' : arguments.debug, - 'execute_programs' : [ [int(x[0]), x[1] ] for x in arguments.execute_program ] - } - device_args = {'cpu_only' : arguments.cpu_only, - 'force_gpu_idx' : arguments.force_gpu_idx, - } - from mainscripts import Trainer - Trainer.main(args, device_args) - - p = subparsers.add_parser( "train", help="Trainer") - p.add_argument('--training-data-src-dir', required=True, action=fixPathAction, dest="training_data_src_dir", help="Dir of extracted SRC faceset.") - p.add_argument('--training-data-dst-dir', required=True, action=fixPathAction, dest="training_data_dst_dir", help="Dir of extracted DST faceset.") - p.add_argument('--pretraining-data-dir', action=fixPathAction, dest="pretraining_data_dir", default=None, help="Optional dir of extracted faceset that will be used in pretraining mode.") - p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") - p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") - p.add_argument('--no-preview', action="store_true", dest="no_preview", default=False, help="Disable preview window.") - p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") - p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") - p.add_argument('--execute-program', dest="execute_program", default=[], action='append', nargs='+') - p.set_defaults (func=process_train) - - def process_convert(arguments): - os_utils.set_process_lowest_prio() - args = {'input_dir' : arguments.input_dir, - 'output_dir' : arguments.output_dir, - 'aligned_dir' : arguments.aligned_dir, - 'avaperator_aligned_dir' : arguments.avaperator_aligned_dir, - 'model_dir' : arguments.model_dir, - 'model_name' : arguments.model_name, - 'debug' : arguments.debug, - } - device_args = {'cpu_only' : arguments.cpu_only, - 'force_gpu_idx' : arguments.force_gpu_idx, - } - from mainscripts import Converter - Converter.main (args, device_args) - - p = subparsers.add_parser( "convert", help="Converter") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the converted files will be stored.") - p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", help="Aligned directory. This is where the extracted of dst faces stored.") - p.add_argument('--avaperator-aligned-dir', action=fixPathAction, dest="avaperator_aligned_dir", help="Only for AVATAR model. Directory of aligned avatar operator faces.") - p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") - p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") - p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.") - p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.") - p.set_defaults(func=process_convert) - - videoed_parser = subparsers.add_parser( "videoed", help="Video processing.").add_subparsers() - - def process_videoed_extract_video(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import VideoEd - VideoEd.extract_video (arguments.input_file, arguments.output_dir, arguments.output_ext, arguments.fps) - p = videoed_parser.add_parser( "extract-video", help="Extract images from video file.") - p.add_argument('--input-file', required=True, action=fixPathAction, dest="input_file", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted images will be stored.") - p.add_argument('--output-ext', dest="output_ext", default=None, help="Image format (extension) of output files.") - p.add_argument('--fps', type=int, dest="fps", default=None, help="How many frames of every second of the video will be extracted. 0 - full fps.") - p.set_defaults(func=process_videoed_extract_video) - - def process_videoed_cut_video(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import VideoEd - VideoEd.cut_video (arguments.input_file, - arguments.from_time, - arguments.to_time, - arguments.audio_track_id, - arguments.bitrate) - p = videoed_parser.add_parser( "cut-video", help="Cut video file.") - p.add_argument('--input-file', required=True, action=fixPathAction, dest="input_file", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--from-time', dest="from_time", default=None, help="From time, for example 00:00:00.000") - p.add_argument('--to-time', dest="to_time", default=None, help="To time, for example 00:00:00.000") - p.add_argument('--audio-track-id', type=int, dest="audio_track_id", default=None, help="Specify audio track id.") - p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.") - p.set_defaults(func=process_videoed_cut_video) - - def process_videoed_denoise_image_sequence(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import VideoEd - VideoEd.denoise_image_sequence (arguments.input_dir, arguments.ext, arguments.factor) - p = videoed_parser.add_parser( "denoise-image-sequence", help="Denoise sequence of images, keeping sharp edges. This allows you to make the final fake more believable, since the neural network is not able to make a detailed skin texture, but it makes the edges quite clear. Therefore, if the whole frame is more `blurred`, then a fake will seem more believable. Especially true for scenes of the film, which are usually very clear.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.") - p.add_argument('--factor', type=int, dest="factor", default=None, help="Denoise factor (1-20).") - p.set_defaults(func=process_videoed_denoise_image_sequence) - - def process_videoed_video_from_sequence(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import VideoEd - VideoEd.video_from_sequence (arguments.input_dir, - arguments.output_file, - arguments.reference_file, - arguments.ext, - arguments.fps, - arguments.bitrate, - arguments.lossless) - - p = videoed_parser.add_parser( "video-from-sequence", help="Make video from image sequence.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--output-file', required=True, action=fixPathAction, dest="output_file", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--reference-file', action=fixPathAction, dest="reference_file", help="Reference file used to determine proper FPS and transfer audio from it. Specify .*-extension to find first file.") - p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.") - p.add_argument('--fps', type=int, dest="fps", default=None, help="FPS of output file. Overwritten by reference-file.") - p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.") - p.add_argument('--lossless', action="store_true", dest="lossless", default=False, help="PNG codec.") - p.set_defaults(func=process_videoed_video_from_sequence) - - def process_labelingtool_edit_mask(arguments): - from mainscripts import MaskEditorTool - MaskEditorTool.mask_editor_main (arguments.input_dir, arguments.confirmed_dir, arguments.skipped_dir) - - labeling_parser = subparsers.add_parser( "labelingtool", help="Labeling tool.").add_subparsers() - p = labeling_parser.add_parser ( "edit_mask", help="") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory of aligned faces.") - p.add_argument('--confirmed-dir', required=True, action=fixPathAction, dest="confirmed_dir", help="This is where the labeled faces will be stored.") - p.add_argument('--skipped-dir', required=True, action=fixPathAction, dest="skipped_dir", help="This is where the labeled faces will be stored.") - p.set_defaults(func=process_labelingtool_edit_mask) - - def bad_args(arguments): - parser.print_help() - exit(0) - parser.set_defaults(func=bad_args) - - arguments = parser.parse_args() - arguments.func(arguments) - - print ("Done.") - - """ - Suppressing error with keras 2.2.4+ on python exit: - - Exception ignored in: > - Traceback (most recent call last): - File "D:\DeepFaceLab\_internal\bin\lib\site-packages\tensorflow\python\client\session.py", line 1413, in __del__ - AttributeError: 'NoneType' object has no attribute 'raise_exception_on_not_ok_status' - - reproduce: https://github.com/keras-team/keras/issues/11751 ( still no solution ) - """ - outnull_file = open(os.devnull, 'w') - os.dup2 ( outnull_file.fileno(), sys.stderr.fileno() ) - sys.stderr = outnull_file - - -''' -import code -code.interact(local=dict(globals(), **locals())) -''' +import os +import sys +import time +import argparse +import multiprocessing +from utils import Path_utils +from utils import os_utils +from pathlib import Path + +if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 6): + raise Exception("This program requires at least Python 3.6") + +class fixPathAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, os.path.abspath(os.path.expanduser(values))) + +if __name__ == "__main__": + multiprocessing.set_start_method("spawn") + + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers() + + def process_extract(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Extractor + Extractor.main( arguments.input_dir, + arguments.output_dir, + arguments.debug_dir, + arguments.detector, + arguments.manual_fix, + arguments.manual_output_debug_fix, + arguments.manual_window_size, + face_type=arguments.face_type, + device_args={'cpu_only' : arguments.cpu_only, + 'multi_gpu' : arguments.multi_gpu, + } + ) + + p = subparsers.add_parser( "extract", help="Extract the faces from a pictures.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.") + p.add_argument('--debug-dir', action=fixPathAction, dest="debug_dir", help="Writes debug images to this directory.") + p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'") + p.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.") + p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") + p.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") + p.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.") + p.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=1368, help="Manual fix window size. Default: 1368.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU. Forces to use MT extractor.") + p.set_defaults (func=process_extract) + + + def process_dev_extract_umd_csv(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Extractor + Extractor.extract_umd_csv( arguments.input_csv_file, + device_args={'cpu_only' : arguments.cpu_only, + 'multi_gpu' : arguments.multi_gpu, + } + ) + + p = subparsers.add_parser( "dev_extract_umd_csv", help="") + p.add_argument('--input-csv-file', required=True, action=fixPathAction, dest="input_csv_file", help="input_csv_file") + p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU.") + p.set_defaults (func=process_dev_extract_umd_csv) + """ + def process_extract_fanseg(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Extractor + Extractor.extract_fanseg( arguments.input_dir, + device_args={'cpu_only' : arguments.cpu_only, + 'multi_gpu' : arguments.multi_gpu, + } + ) + + p = subparsers.add_parser( "extract_fanseg", help="Extract fanseg mask from faces.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU.") + p.set_defaults (func=process_extract_fanseg) + """ + + def process_sort(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Sorter + Sorter.main (input_path=arguments.input_dir, sort_by_method=arguments.sort_by_method) + + p = subparsers.add_parser( "sort", help="Sort faces in a directory.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "face-pitch", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "oneface", "final", "final-no-blur", "test"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) + p.set_defaults (func=process_sort) + + def process_util(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Util + + if arguments.convert_png_to_jpg: + Util.convert_png_to_jpg_folder (input_path=arguments.input_dir) + + if arguments.add_landmarks_debug_images: + Util.add_landmarks_debug_images (input_path=arguments.input_dir) + + if arguments.recover_original_aligned_filename: + Util.recover_original_aligned_filename (input_path=arguments.input_dir) + + #if arguments.remove_fanseg: + # Util.remove_fanseg_folder (input_path=arguments.input_dir) + + p = subparsers.add_parser( "util", help="Utilities.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--convert-png-to-jpg', action="store_true", dest="convert_png_to_jpg", default=False, help="Convert DeepFaceLAB PNG files to JPEG.") + p.add_argument('--add-landmarks-debug-images', action="store_true", dest="add_landmarks_debug_images", default=False, help="Add landmarks debug image for aligned faces.") + p.add_argument('--recover-original-aligned-filename', action="store_true", dest="recover_original_aligned_filename", default=False, help="Recover original aligned filename.") + #p.add_argument('--remove-fanseg', action="store_true", dest="remove_fanseg", default=False, help="Remove fanseg mask from aligned faces.") + + p.set_defaults (func=process_util) + + def process_train(arguments): + os_utils.set_process_lowest_prio() + args = {'training_data_src_dir' : arguments.training_data_src_dir, + 'training_data_dst_dir' : arguments.training_data_dst_dir, + 'pretraining_data_dir' : arguments.pretraining_data_dir, + 'model_path' : arguments.model_dir, + 'model_name' : arguments.model_name, + 'no_preview' : arguments.no_preview, + 'debug' : arguments.debug, + 'execute_programs' : [ [int(x[0]), x[1] ] for x in arguments.execute_program ] + } + device_args = {'cpu_only' : arguments.cpu_only, + 'force_gpu_idx' : arguments.force_gpu_idx, + } + from mainscripts import Trainer + Trainer.main(args, device_args) + + p = subparsers.add_parser( "train", help="Trainer") + p.add_argument('--training-data-src-dir', required=True, action=fixPathAction, dest="training_data_src_dir", help="Dir of extracted SRC faceset.") + p.add_argument('--training-data-dst-dir', required=True, action=fixPathAction, dest="training_data_dst_dir", help="Dir of extracted DST faceset.") + p.add_argument('--pretraining-data-dir', action=fixPathAction, dest="pretraining_data_dir", default=None, help="Optional dir of extracted faceset that will be used in pretraining mode.") + p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") + p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") + p.add_argument('--no-preview', action="store_true", dest="no_preview", default=False, help="Disable preview window.") + p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") + p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") + p.add_argument('--execute-program', dest="execute_program", default=[], action='append', nargs='+') + p.set_defaults (func=process_train) + + def process_convert(arguments): + os_utils.set_process_lowest_prio() + args = {'input_dir' : arguments.input_dir, + 'output_dir' : arguments.output_dir, + 'aligned_dir' : arguments.aligned_dir, + 'avaperator_aligned_dir' : arguments.avaperator_aligned_dir, + 'model_dir' : arguments.model_dir, + 'model_name' : arguments.model_name, + 'debug' : arguments.debug, + } + device_args = {'cpu_only' : arguments.cpu_only, + 'force_gpu_idx' : arguments.force_gpu_idx, + } + from mainscripts import Converter + Converter.main (args, device_args) + + p = subparsers.add_parser( "convert", help="Converter") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the converted files will be stored.") + p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", help="Aligned directory. This is where the extracted of dst faces stored.") + p.add_argument('--avaperator-aligned-dir', action=fixPathAction, dest="avaperator_aligned_dir", help="Only for AVATAR model. Directory of aligned avatar operator faces.") + p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") + p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") + p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.") + p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.") + p.set_defaults(func=process_convert) + + videoed_parser = subparsers.add_parser( "videoed", help="Video processing.").add_subparsers() + + def process_videoed_extract_video(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import VideoEd + VideoEd.extract_video (arguments.input_file, arguments.output_dir, arguments.output_ext, arguments.fps) + p = videoed_parser.add_parser( "extract-video", help="Extract images from video file.") + p.add_argument('--input-file', required=True, action=fixPathAction, dest="input_file", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted images will be stored.") + p.add_argument('--output-ext', dest="output_ext", default=None, help="Image format (extension) of output files.") + p.add_argument('--fps', type=int, dest="fps", default=None, help="How many frames of every second of the video will be extracted. 0 - full fps.") + p.set_defaults(func=process_videoed_extract_video) + + def process_videoed_cut_video(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import VideoEd + VideoEd.cut_video (arguments.input_file, + arguments.from_time, + arguments.to_time, + arguments.audio_track_id, + arguments.bitrate) + p = videoed_parser.add_parser( "cut-video", help="Cut video file.") + p.add_argument('--input-file', required=True, action=fixPathAction, dest="input_file", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--from-time', dest="from_time", default=None, help="From time, for example 00:00:00.000") + p.add_argument('--to-time', dest="to_time", default=None, help="To time, for example 00:00:00.000") + p.add_argument('--audio-track-id', type=int, dest="audio_track_id", default=None, help="Specify audio track id.") + p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.") + p.set_defaults(func=process_videoed_cut_video) + + def process_videoed_denoise_image_sequence(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import VideoEd + VideoEd.denoise_image_sequence (arguments.input_dir, arguments.ext, arguments.factor) + p = videoed_parser.add_parser( "denoise-image-sequence", help="Denoise sequence of images, keeping sharp edges. This allows you to make the final fake more believable, since the neural network is not able to make a detailed skin texture, but it makes the edges quite clear. Therefore, if the whole frame is more `blurred`, then a fake will seem more believable. Especially true for scenes of the film, which are usually very clear.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.") + p.add_argument('--factor', type=int, dest="factor", default=None, help="Denoise factor (1-20).") + p.set_defaults(func=process_videoed_denoise_image_sequence) + + def process_videoed_video_from_sequence(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import VideoEd + VideoEd.video_from_sequence (arguments.input_dir, + arguments.output_file, + arguments.reference_file, + arguments.ext, + arguments.fps, + arguments.bitrate, + arguments.lossless) + + p = videoed_parser.add_parser( "video-from-sequence", help="Make video from image sequence.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--output-file', required=True, action=fixPathAction, dest="output_file", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--reference-file', action=fixPathAction, dest="reference_file", help="Reference file used to determine proper FPS and transfer audio from it. Specify .*-extension to find first file.") + p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.") + p.add_argument('--fps', type=int, dest="fps", default=None, help="FPS of output file. Overwritten by reference-file.") + p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.") + p.add_argument('--lossless', action="store_true", dest="lossless", default=False, help="PNG codec.") + p.set_defaults(func=process_videoed_video_from_sequence) + + def process_labelingtool_edit_mask(arguments): + from mainscripts import MaskEditorTool + MaskEditorTool.mask_editor_main (arguments.input_dir, arguments.confirmed_dir, arguments.skipped_dir) + + labeling_parser = subparsers.add_parser( "labelingtool", help="Labeling tool.").add_subparsers() + p = labeling_parser.add_parser ( "edit_mask", help="") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory of aligned faces.") + p.add_argument('--confirmed-dir', required=True, action=fixPathAction, dest="confirmed_dir", help="This is where the labeled faces will be stored.") + p.add_argument('--skipped-dir', required=True, action=fixPathAction, dest="skipped_dir", help="This is where the labeled faces will be stored.") + p.set_defaults(func=process_labelingtool_edit_mask) + + def bad_args(arguments): + parser.print_help() + exit(0) + parser.set_defaults(func=bad_args) + + arguments = parser.parse_args() + arguments.func(arguments) + + print ("Done.") + + """ + Suppressing error with keras 2.2.4+ on python exit: + + Exception ignored in: > + Traceback (most recent call last): + File "D:\DeepFaceLab\_internal\bin\lib\site-packages\tensorflow\python\client\session.py", line 1413, in __del__ + AttributeError: 'NoneType' object has no attribute 'raise_exception_on_not_ok_status' + + reproduce: https://github.com/keras-team/keras/issues/11751 ( still no solution ) + """ + outnull_file = open(os.devnull, 'w') + os.dup2 ( outnull_file.fileno(), sys.stderr.fileno() ) + sys.stderr = outnull_file + + +''' +import code +code.interact(local=dict(globals(), **locals())) +''' diff --git a/mainscripts/Converter.py b/mainscripts/Converter.py index 5ce9b7a..7d21748 100644 --- a/mainscripts/Converter.py +++ b/mainscripts/Converter.py @@ -1,391 +1,391 @@ -import sys -import multiprocessing -import operator -import os -import shutil -import time -import traceback -from pathlib import Path - -import cv2 -import numpy as np - -from converters import Converter -from interact import interact as io -from joblib import SubprocessFunctionCaller, Subprocessor -from utils import Path_utils -from utils.cv2_utils import * -from utils.DFLJPG import DFLJPG -from utils.DFLPNG import DFLPNG -from imagelib import normalize_channels - -class ConvertSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - io.log_info ('Running on %s.' % (client_dict['device_name']) ) - self.device_idx = client_dict['device_idx'] - self.device_name = client_dict['device_name'] - self.converter = client_dict['converter'] - self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None - self.alignments = client_dict['alignments'] - self.avatar_image_paths = client_dict['avatar_image_paths'] - self.debug = client_dict['debug'] - - #transfer and set stdin in order to work code.interact in debug subprocess - stdin_fd = client_dict['stdin_fd'] - if stdin_fd is not None: - sys.stdin = os.fdopen(stdin_fd) - - from nnlib import nnlib - #model process ate all GPU mem, - #so we cannot use GPU for any TF operations in converter processes - #therefore forcing active_DeviceConfig to CPU only - nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True) - - self.converter.on_cli_initialize() - - return None - - #override - def process_data(self, data): - idx, filename = data - filename_path = Path(filename) - files_processed = 1 - faces_processed = 0 - - output_filename_path = self.output_path / (filename_path.stem + '.png') - - if (self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR ) \ - and filename_path.stem not in self.alignments.keys(): - if not self.debug: - self.log_info ( 'no faces found for %s, copying without faces' % (filename_path.name) ) - - if filename_path.suffix == '.png': - shutil.copy ( str(filename_path), str(output_filename_path) ) - else: - image = cv2_imread(str(filename_path)) - cv2_imwrite ( str(output_filename_path), image ) - else: - image = (cv2_imread(str(filename_path)) / 255.0).astype(np.float32) - image = normalize_channels (image, 3) - - if self.converter.type == Converter.TYPE_IMAGE: - image = self.converter.cli_convert_image(image, None, self.debug) - - if self.debug: - return (1, image) - - faces_processed = 1 - - elif self.converter.type == Converter.TYPE_IMAGE_WITH_LANDMARKS: - #currently unused - if filename_path.suffix == '.png': - dflimg = DFLPNG.load( str(filename_path) ) - elif filename_path.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filename_path) ) - else: - dflimg = None - - if dflimg is not None: - image_landmarks = dflimg.get_landmarks() - - image = self.converter.convert_image(image, image_landmarks, self.debug) - - if self.debug: - raise NotImplementedError - #for img in image: - # io.show_image ('Debug convert', img ) - # cv2.waitKey(0) - faces_processed = 1 - else: - self.log_err ("%s is not a dfl image file" % (filename_path.name) ) - - elif self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR: - - ava_face = None - if self.converter.type == Converter.TYPE_FACE_AVATAR: - ava_filename_path = self.avatar_image_paths[idx] - ava_face = (cv2_imread(str(ava_filename_path)) / 255.0).astype(np.float32) - ava_face = normalize_channels (ava_face, 3) - faces = self.alignments[filename_path.stem] - - if self.debug: - debug_images = [] - - for face_num, image_landmarks in enumerate(faces): - try: - if self.debug: - self.log_info ( '\nConverting face_num [%d] in file [%s]' % (face_num, filename_path) ) - - if self.debug: - debug_images += self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) - else: - image = self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) - - except Exception as e: - e_str = traceback.format_exc() - if 'MemoryError' in e_str: - raise Subprocessor.SilenceException - else: - raise Exception( 'Error while converting face_num [%d] in file [%s]: %s' % (face_num, filename_path, e_str) ) - - if self.debug: - return (1, debug_images) - - faces_processed = len(faces) - - if not self.debug: - cv2_imwrite (str(output_filename_path), (image*255).astype(np.uint8) ) - - - return (0, files_processed, faces_processed) - - #overridable - def get_data_name (self, data): - #return string identificator of your data - idx, filename = data - return filename - - #override - def __init__(self, converter, input_path_image_paths, output_path, alignments, avatar_image_paths=None, debug = False): - super().__init__('Converter', ConvertSubprocessor.Cli, 86400 if debug == True else 60) - - self.converter = converter - self.input_data = self.input_path_image_paths = input_path_image_paths - self.input_data_idxs = [ *range(len(self.input_data)) ] - self.output_path = output_path - self.alignments = alignments - self.avatar_image_paths = avatar_image_paths - self.debug = debug - - self.files_processed = 0 - self.faces_processed = 0 - - #override - def process_info_generator(self): - r = [0] if self.debug else range( min(6,multiprocessing.cpu_count()) ) - - for i in r: - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'converter' : self.converter, - 'output_dir' : str(self.output_path), - 'alignments' : self.alignments, - 'avatar_image_paths' : self.avatar_image_paths, - 'debug': self.debug, - 'stdin_fd': sys.stdin.fileno() if self.debug else None - } - - #overridable optional - def on_clients_initialized(self): - if self.debug: - io.named_window ("Debug convert") - - io.progress_bar ("Converting", len (self.input_data_idxs) ) - - #overridable optional - def on_clients_finalized(self): - io.progress_bar_close() - - if self.debug: - io.destroy_all_windows() - - #override - def get_data(self, host_dict): - if len (self.input_data_idxs) > 0: - idx = self.input_data_idxs.pop(0) - return (idx, self.input_data[idx]) - return None - - #override - def on_data_return (self, host_dict, data): - idx, filename = data - self.input_data_idxs.insert(0, idx) - - #override - def on_result (self, host_dict, data, result): - if result[0] == 0: - self.files_processed += result[0] - self.faces_processed += result[1] - elif result[0] == 1: - for img in result[1]: - io.show_image ('Debug convert', (img*255).astype(np.uint8) ) - io.wait_any_key() - io.progress_bar_inc(1) - - #override - def on_tick(self): - self.converter.on_host_tick() - - #override - def get_result(self): - return self.files_processed, self.faces_processed - -def main (args, device_args): - io.log_info ("Running converter.\r\n") - - aligned_dir = args.get('aligned_dir', None) - avaperator_aligned_dir = args.get('avaperator_aligned_dir', None) - - try: - input_path = Path(args['input_dir']) - output_path = Path(args['output_dir']) - model_path = Path(args['model_dir']) - - if not input_path.exists(): - io.log_err('Input directory not found. Please ensure it exists.') - return - - if output_path.exists(): - for filename in Path_utils.get_image_paths(output_path): - Path(filename).unlink() - else: - output_path.mkdir(parents=True, exist_ok=True) - - if not model_path.exists(): - io.log_err('Model directory not found. Please ensure it exists.') - return - - import models - model = models.import_model( args['model_name'] )(model_path, device_args=device_args) - converter = model.get_converter() - - input_path_image_paths = Path_utils.get_image_paths(input_path) - alignments = None - avatar_image_paths = None - if converter.type == Converter.TYPE_FACE or converter.type == Converter.TYPE_FACE_AVATAR: - if aligned_dir is None: - io.log_err('Aligned directory not found. Please ensure it exists.') - return - - aligned_path = Path(aligned_dir) - if not aligned_path.exists(): - io.log_err('Aligned directory not found. Please ensure it exists.') - return - - alignments = {} - - aligned_path_image_paths = Path_utils.get_image_paths(aligned_path) - for filepath in io.progress_bar_generator(aligned_path_image_paths, "Collecting alignments"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - - source_filename_stem = Path( dflimg.get_source_filename() ).stem - if source_filename_stem not in alignments.keys(): - alignments[ source_filename_stem ] = [] - - alignments[ source_filename_stem ].append (dflimg.get_source_landmarks()) - - - if converter.type == Converter.TYPE_FACE_AVATAR: - if avaperator_aligned_dir is None: - io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') - return - - avaperator_aligned_path = Path(avaperator_aligned_dir) - if not avaperator_aligned_path.exists(): - io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') - return - - avatar_image_paths = [] - for filename in io.progress_bar_generator( Path_utils.get_image_paths(avaperator_aligned_path) , "Sorting avaperator faces"): - filepath = Path(filename) - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("Fatal error: %s is not a dfl image file" % (filepath.name) ) - return - - avatar_image_paths += [ (filename, dflimg.get_source_filename() ) ] - avatar_image_paths = [ p[0] for p in sorted(avatar_image_paths, key=operator.itemgetter(1)) ] - - if len(input_path_image_paths) < len(avatar_image_paths): - io.log_err("Input faces count must be >= avatar operator faces count.") - return - - files_processed, faces_processed = ConvertSubprocessor ( - converter = converter, - input_path_image_paths = input_path_image_paths, - output_path = output_path, - alignments = alignments, - avatar_image_paths = avatar_image_paths, - debug = args.get('debug',False) - ).run() - - model.finalize() - - except Exception as e: - print ( 'Error: %s' % (str(e))) - traceback.print_exc() - -''' -if model_name == 'AVATAR': - output_path_image_paths = Path_utils.get_image_paths(output_path) - - last_ok_frame = -1 - for filename in output_path_image_paths: - filename_path = Path(filename) - stem = Path(filename).stem - try: - frame = int(stem) - except: - raise Exception ('Aligned avatars must be created from indexed sequence files.') - - if frame-last_ok_frame > 1: - start = last_ok_frame + 1 - end = frame - 1 - - print ("Filling gaps: [%d...%d]" % (start, end) ) - for i in range (start, end+1): - shutil.copy ( str(filename), str( output_path / ('%.5d%s' % (i, filename_path.suffix )) ) ) - - last_ok_frame = frame -''' -#interpolate landmarks -#from facelib import LandmarksProcessor -#from facelib import FaceType -#a = sorted(alignments.keys()) -#a_len = len(a) -# -#box_pts = 3 -#box = np.ones(box_pts)/box_pts -#for i in range( a_len ): -# if i >= box_pts and i <= a_len-box_pts-1: -# af0 = alignments[ a[i] ][0] ##first face -# m0 = LandmarksProcessor.get_transform_mat (af0, 256, face_type=FaceType.FULL) -# -# points = [] -# -# for j in range(-box_pts, box_pts+1): -# af = alignments[ a[i+j] ][0] ##first face -# m = LandmarksProcessor.get_transform_mat (af, 256, face_type=FaceType.FULL) -# p = LandmarksProcessor.transform_points (af, m) -# points.append (p) -# -# points = np.array(points) -# points_len = len(points) -# t_points = np.transpose(points, [1,0,2]) -# -# p1 = np.array ( [ int(np.convolve(x[:,0], box, mode='same')[points_len//2]) for x in t_points ] ) -# p2 = np.array ( [ int(np.convolve(x[:,1], box, mode='same')[points_len//2]) for x in t_points ] ) -# -# new_points = np.concatenate( [np.expand_dims(p1,-1),np.expand_dims(p2,-1)], -1 ) -# -# alignments[ a[i] ][0] = LandmarksProcessor.transform_points (new_points, m0, True).astype(np.int32) +import sys +import multiprocessing +import operator +import os +import shutil +import time +import traceback +from pathlib import Path + +import cv2 +import numpy as np + +from converters import Converter +from interact import interact as io +from joblib import SubprocessFunctionCaller, Subprocessor +from utils import Path_utils +from utils.cv2_utils import * +from utils.DFLJPG import DFLJPG +from utils.DFLPNG import DFLPNG +from imagelib import normalize_channels + +class ConvertSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + + #override + def on_initialize(self, client_dict): + io.log_info ('Running on %s.' % (client_dict['device_name']) ) + self.device_idx = client_dict['device_idx'] + self.device_name = client_dict['device_name'] + self.converter = client_dict['converter'] + self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None + self.alignments = client_dict['alignments'] + self.avatar_image_paths = client_dict['avatar_image_paths'] + self.debug = client_dict['debug'] + + #transfer and set stdin in order to work code.interact in debug subprocess + stdin_fd = client_dict['stdin_fd'] + if stdin_fd is not None: + sys.stdin = os.fdopen(stdin_fd) + + from nnlib import nnlib + #model process ate all GPU mem, + #so we cannot use GPU for any TF operations in converter processes + #therefore forcing active_DeviceConfig to CPU only + nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True) + + self.converter.on_cli_initialize() + + return None + + #override + def process_data(self, data): + idx, filename = data + filename_path = Path(filename) + files_processed = 1 + faces_processed = 0 + + output_filename_path = self.output_path / (filename_path.stem + '.png') + + if (self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR ) \ + and filename_path.stem not in self.alignments.keys(): + if not self.debug: + self.log_info ( 'no faces found for %s, copying without faces' % (filename_path.name) ) + + if filename_path.suffix == '.png': + shutil.copy ( str(filename_path), str(output_filename_path) ) + else: + image = cv2_imread(str(filename_path)) + cv2_imwrite ( str(output_filename_path), image ) + else: + image = (cv2_imread(str(filename_path)) / 255.0).astype(np.float32) + image = normalize_channels (image, 3) + + if self.converter.type == Converter.TYPE_IMAGE: + image = self.converter.cli_convert_image(image, None, self.debug) + + if self.debug: + return (1, image) + + faces_processed = 1 + + elif self.converter.type == Converter.TYPE_IMAGE_WITH_LANDMARKS: + #currently unused + if filename_path.suffix == '.png': + dflimg = DFLPNG.load( str(filename_path) ) + elif filename_path.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filename_path) ) + else: + dflimg = None + + if dflimg is not None: + image_landmarks = dflimg.get_landmarks() + + image = self.converter.convert_image(image, image_landmarks, self.debug) + + if self.debug: + raise NotImplementedError + #for img in image: + # io.show_image ('Debug convert', img ) + # cv2.waitKey(0) + faces_processed = 1 + else: + self.log_err ("%s is not a dfl image file" % (filename_path.name) ) + + elif self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR: + + ava_face = None + if self.converter.type == Converter.TYPE_FACE_AVATAR: + ava_filename_path = self.avatar_image_paths[idx] + ava_face = (cv2_imread(str(ava_filename_path)) / 255.0).astype(np.float32) + ava_face = normalize_channels (ava_face, 3) + faces = self.alignments[filename_path.stem] + + if self.debug: + debug_images = [] + + for face_num, image_landmarks in enumerate(faces): + try: + if self.debug: + self.log_info ( '\nConverting face_num [%d] in file [%s]' % (face_num, filename_path) ) + + if self.debug: + debug_images += self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) + else: + image = self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) + + except Exception as e: + e_str = traceback.format_exc() + if 'MemoryError' in e_str: + raise Subprocessor.SilenceException + else: + raise Exception( 'Error while converting face_num [%d] in file [%s]: %s' % (face_num, filename_path, e_str) ) + + if self.debug: + return (1, debug_images) + + faces_processed = len(faces) + + if not self.debug: + cv2_imwrite (str(output_filename_path), (image*255).astype(np.uint8) ) + + + return (0, files_processed, faces_processed) + + #overridable + def get_data_name (self, data): + #return string identificator of your data + idx, filename = data + return filename + + #override + def __init__(self, converter, input_path_image_paths, output_path, alignments, avatar_image_paths=None, debug = False): + super().__init__('Converter', ConvertSubprocessor.Cli, 86400 if debug == True else 60) + + self.converter = converter + self.input_data = self.input_path_image_paths = input_path_image_paths + self.input_data_idxs = [ *range(len(self.input_data)) ] + self.output_path = output_path + self.alignments = alignments + self.avatar_image_paths = avatar_image_paths + self.debug = debug + + self.files_processed = 0 + self.faces_processed = 0 + + #override + def process_info_generator(self): + r = [0] if self.debug else range( min(6,multiprocessing.cpu_count()) ) + + for i in r: + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + 'converter' : self.converter, + 'output_dir' : str(self.output_path), + 'alignments' : self.alignments, + 'avatar_image_paths' : self.avatar_image_paths, + 'debug': self.debug, + 'stdin_fd': sys.stdin.fileno() if self.debug else None + } + + #overridable optional + def on_clients_initialized(self): + if self.debug: + io.named_window ("Debug convert") + + io.progress_bar ("Converting", len (self.input_data_idxs) ) + + #overridable optional + def on_clients_finalized(self): + io.progress_bar_close() + + if self.debug: + io.destroy_all_windows() + + #override + def get_data(self, host_dict): + if len (self.input_data_idxs) > 0: + idx = self.input_data_idxs.pop(0) + return (idx, self.input_data[idx]) + return None + + #override + def on_data_return (self, host_dict, data): + idx, filename = data + self.input_data_idxs.insert(0, idx) + + #override + def on_result (self, host_dict, data, result): + if result[0] == 0: + self.files_processed += result[0] + self.faces_processed += result[1] + elif result[0] == 1: + for img in result[1]: + io.show_image ('Debug convert', (img*255).astype(np.uint8) ) + io.wait_any_key() + io.progress_bar_inc(1) + + #override + def on_tick(self): + self.converter.on_host_tick() + + #override + def get_result(self): + return self.files_processed, self.faces_processed + +def main (args, device_args): + io.log_info ("Running converter.\r\n") + + aligned_dir = args.get('aligned_dir', None) + avaperator_aligned_dir = args.get('avaperator_aligned_dir', None) + + try: + input_path = Path(args['input_dir']) + output_path = Path(args['output_dir']) + model_path = Path(args['model_dir']) + + if not input_path.exists(): + io.log_err('Input directory not found. Please ensure it exists.') + return + + if output_path.exists(): + for filename in Path_utils.get_image_paths(output_path): + Path(filename).unlink() + else: + output_path.mkdir(parents=True, exist_ok=True) + + if not model_path.exists(): + io.log_err('Model directory not found. Please ensure it exists.') + return + + import models + model = models.import_model( args['model_name'] )(model_path, device_args=device_args) + converter = model.get_converter() + + input_path_image_paths = Path_utils.get_image_paths(input_path) + alignments = None + avatar_image_paths = None + if converter.type == Converter.TYPE_FACE or converter.type == Converter.TYPE_FACE_AVATAR: + if aligned_dir is None: + io.log_err('Aligned directory not found. Please ensure it exists.') + return + + aligned_path = Path(aligned_dir) + if not aligned_path.exists(): + io.log_err('Aligned directory not found. Please ensure it exists.') + return + + alignments = {} + + aligned_path_image_paths = Path_utils.get_image_paths(aligned_path) + for filepath in io.progress_bar_generator(aligned_path_image_paths, "Collecting alignments"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + continue + + source_filename_stem = Path( dflimg.get_source_filename() ).stem + if source_filename_stem not in alignments.keys(): + alignments[ source_filename_stem ] = [] + + alignments[ source_filename_stem ].append (dflimg.get_source_landmarks()) + + + if converter.type == Converter.TYPE_FACE_AVATAR: + if avaperator_aligned_dir is None: + io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') + return + + avaperator_aligned_path = Path(avaperator_aligned_dir) + if not avaperator_aligned_path.exists(): + io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') + return + + avatar_image_paths = [] + for filename in io.progress_bar_generator( Path_utils.get_image_paths(avaperator_aligned_path) , "Sorting avaperator faces"): + filepath = Path(filename) + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("Fatal error: %s is not a dfl image file" % (filepath.name) ) + return + + avatar_image_paths += [ (filename, dflimg.get_source_filename() ) ] + avatar_image_paths = [ p[0] for p in sorted(avatar_image_paths, key=operator.itemgetter(1)) ] + + if len(input_path_image_paths) < len(avatar_image_paths): + io.log_err("Input faces count must be >= avatar operator faces count.") + return + + files_processed, faces_processed = ConvertSubprocessor ( + converter = converter, + input_path_image_paths = input_path_image_paths, + output_path = output_path, + alignments = alignments, + avatar_image_paths = avatar_image_paths, + debug = args.get('debug',False) + ).run() + + model.finalize() + + except Exception as e: + print ( 'Error: %s' % (str(e))) + traceback.print_exc() + +''' +if model_name == 'AVATAR': + output_path_image_paths = Path_utils.get_image_paths(output_path) + + last_ok_frame = -1 + for filename in output_path_image_paths: + filename_path = Path(filename) + stem = Path(filename).stem + try: + frame = int(stem) + except: + raise Exception ('Aligned avatars must be created from indexed sequence files.') + + if frame-last_ok_frame > 1: + start = last_ok_frame + 1 + end = frame - 1 + + print ("Filling gaps: [%d...%d]" % (start, end) ) + for i in range (start, end+1): + shutil.copy ( str(filename), str( output_path / ('%.5d%s' % (i, filename_path.suffix )) ) ) + + last_ok_frame = frame +''' +#interpolate landmarks +#from facelib import LandmarksProcessor +#from facelib import FaceType +#a = sorted(alignments.keys()) +#a_len = len(a) +# +#box_pts = 3 +#box = np.ones(box_pts)/box_pts +#for i in range( a_len ): +# if i >= box_pts and i <= a_len-box_pts-1: +# af0 = alignments[ a[i] ][0] ##first face +# m0 = LandmarksProcessor.get_transform_mat (af0, 256, face_type=FaceType.FULL) +# +# points = [] +# +# for j in range(-box_pts, box_pts+1): +# af = alignments[ a[i+j] ][0] ##first face +# m = LandmarksProcessor.get_transform_mat (af, 256, face_type=FaceType.FULL) +# p = LandmarksProcessor.transform_points (af, m) +# points.append (p) +# +# points = np.array(points) +# points_len = len(points) +# t_points = np.transpose(points, [1,0,2]) +# +# p1 = np.array ( [ int(np.convolve(x[:,0], box, mode='same')[points_len//2]) for x in t_points ] ) +# p2 = np.array ( [ int(np.convolve(x[:,1], box, mode='same')[points_len//2]) for x in t_points ] ) +# +# new_points = np.concatenate( [np.expand_dims(p1,-1),np.expand_dims(p2,-1)], -1 ) +# +# alignments[ a[i] ][0] = LandmarksProcessor.transform_points (new_points, m0, True).astype(np.int32) diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py index bb9873a..56ea0c6 100644 --- a/mainscripts/Extractor.py +++ b/mainscripts/Extractor.py @@ -1,873 +1,873 @@ -import traceback -import os -import sys -import time -import multiprocessing -import shutil -from pathlib import Path -import numpy as np -import mathlib -import imagelib -import cv2 -from utils import Path_utils -from utils.DFLPNG import DFLPNG -from utils.DFLJPG import DFLJPG -from utils.cv2_utils import * -import facelib -from facelib import FaceType -from facelib import LandmarksProcessor -from facelib import FANSegmentator -from nnlib import nnlib -from joblib import Subprocessor -from interact import interact as io - -class ExtractSubprocessor(Subprocessor): - class Data(object): - def __init__(self, filename=None, rects=None, landmarks = None, landmarks_accurate=True, pitch_yaw_roll=None, final_output_files = None): - self.filename = filename - self.rects = rects or [] - self.rects_rotation = 0 - self.landmarks_accurate = landmarks_accurate - self.landmarks = landmarks or [] - self.pitch_yaw_roll = pitch_yaw_roll - self.final_output_files = final_output_files or [] - self.faces_detected = 0 - - class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - self.type = client_dict['type'] - self.image_size = client_dict['image_size'] - self.face_type = client_dict['face_type'] - self.device_idx = client_dict['device_idx'] - self.cpu_only = client_dict['device_type'] == 'CPU' - self.final_output_path = Path(client_dict['final_output_dir']) if 'final_output_dir' in client_dict.keys() else None - self.debug_dir = client_dict['debug_dir'] - - self.cached_image = (None, None) - - self.e = None - device_config = nnlib.DeviceConfig ( cpu_only=self.cpu_only, force_gpu_idx=self.device_idx, allow_growth=True) - self.device_vram = device_config.gpu_vram_gb[0] - - intro_str = 'Running on %s.' % (client_dict['device_name']) - if not self.cpu_only and self.device_vram <= 2: - intro_str += " Recommended to close all programs using this device." - - self.log_info (intro_str) - - if 'rects' in self.type: - if self.type == 'rects-mt': - nnlib.import_all (device_config) - self.e = facelib.MTCExtractor() - elif self.type == 'rects-dlib': - nnlib.import_dlib (device_config) - self.e = facelib.DLIBExtractor(nnlib.dlib) - elif self.type == 'rects-s3fd': - nnlib.import_all (device_config) - self.e = facelib.S3FDExtractor() - else: - raise ValueError ("Wrong type.") - - if self.e is not None: - self.e.__enter__() - - elif self.type == 'landmarks': - nnlib.import_all (device_config) - self.e = facelib.LandmarksExtractor(nnlib.keras) - self.e.__enter__() - if self.device_vram >= 2: - self.second_pass_e = facelib.S3FDExtractor() - self.second_pass_e.__enter__() - else: - self.second_pass_e = None - - elif self.type == 'fanseg': - nnlib.import_all (device_config) - self.e = facelib.FANSegmentator(256, FaceType.toString(FaceType.FULL) ) - self.e.__enter__() - - elif self.type == 'final': - pass - - #override - def on_finalize(self): - if self.e is not None: - self.e.__exit__() - - #override - def process_data(self, data): - filename_path = Path( data.filename ) - - filename_path_str = str(filename_path) - if self.cached_image[0] == filename_path_str: - image = self.cached_image[1] #cached image for manual extractor - else: - image = cv2_imread( filename_path_str ) - - if image is None: - self.log_err ( 'Failed to extract %s, reason: cv2_imread() fail.' % ( str(filename_path) ) ) - return data - - image_shape = image.shape - if len(image_shape) == 2: - h, w = image.shape - image = image[:,:,np.newaxis] - ch = 1 - else: - h, w, ch = image.shape - - if ch == 1: - image = np.repeat (image, 3, -1) - elif ch == 4: - image = image[:,:,0:3] - - wm, hm = w % 2, h % 2 - if wm + hm != 0: #fix odd image - image = image[0:h-hm,0:w-wm,:] - self.cached_image = ( filename_path_str, image ) - - src_dflimg = None - h, w, ch = image.shape - if h == w: - #extracting from already extracted jpg image? - if filename_path.suffix == '.png': - src_dflimg = DFLPNG.load ( str(filename_path) ) - if filename_path.suffix == '.jpg': - src_dflimg = DFLJPG.load ( str(filename_path) ) - - if 'rects' in self.type: - if min(w,h) < 128: - self.log_err ( 'Image is too small %s : [%d, %d]' % ( str(filename_path), w, h ) ) - data.rects = [] - else: - for rot in ([0, 90, 270, 180]): - data.rects_rotation = rot - if rot == 0: - rotated_image = image - elif rot == 90: - rotated_image = image.swapaxes( 0,1 )[:,::-1,:] - elif rot == 180: - rotated_image = image[::-1,::-1,:] - elif rot == 270: - rotated_image = image.swapaxes( 0,1 )[::-1,:,:] - - rects = data.rects = self.e.extract (rotated_image, is_bgr=True) - if len(rects) != 0: - break - - return data - - elif self.type == 'landmarks': - - if data.rects_rotation == 0: - rotated_image = image - elif data.rects_rotation == 90: - rotated_image = image.swapaxes( 0,1 )[:,::-1,:] - elif data.rects_rotation == 180: - rotated_image = image[::-1,::-1,:] - elif data.rects_rotation == 270: - rotated_image = image.swapaxes( 0,1 )[::-1,:,:] - - data.landmarks = self.e.extract (rotated_image, data.rects, self.second_pass_e if (src_dflimg is None and data.landmarks_accurate) else None, is_bgr=True) - if data.rects_rotation != 0: - for i, (rect, lmrks) in enumerate(zip(data.rects, data.landmarks)): - new_rect, new_lmrks = rect, lmrks - (l,t,r,b) = rect - if data.rects_rotation == 90: - new_rect = ( t, h-l, b, h-r) - if lmrks is not None: - new_lmrks = lmrks[:,::-1].copy() - new_lmrks[:,1] = h - new_lmrks[:,1] - elif data.rects_rotation == 180: - if lmrks is not None: - new_rect = ( w-l, h-t, w-r, h-b) - new_lmrks = lmrks.copy() - new_lmrks[:,0] = w - new_lmrks[:,0] - new_lmrks[:,1] = h - new_lmrks[:,1] - elif data.rects_rotation == 270: - new_rect = ( w-b, l, w-t, r ) - if lmrks is not None: - new_lmrks = lmrks[:,::-1].copy() - new_lmrks[:,0] = w - new_lmrks[:,0] - data.rects[i], data.landmarks[i] = new_rect, new_lmrks - - return data - - elif self.type == 'final': - data.final_output_files = [] - rects = data.rects - landmarks = data.landmarks - - if self.debug_dir is not None: - debug_output_file = str( Path(self.debug_dir) / (filename_path.stem+'.jpg') ) - debug_image = image.copy() - - if src_dflimg is not None and len(rects) != 1: - #if re-extracting from dflimg and more than 1 or zero faces detected - dont process and just copy it - print("src_dflimg is not None and len(rects) != 1", str(filename_path) ) - output_file = str(self.final_output_path / filename_path.name) - if str(filename_path) != str(output_file): - shutil.copy ( str(filename_path), str(output_file) ) - data.final_output_files.append (output_file) - else: - face_idx = 0 - for rect, image_landmarks in zip( rects, landmarks ): - if src_dflimg is not None and face_idx > 1: - #cannot extract more than 1 face from dflimg - break - - if image_landmarks is None: - continue - - rect = np.array(rect) - - if self.face_type == FaceType.MARK_ONLY: - face_image = image - face_image_landmarks = image_landmarks - else: - image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type) - face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4) - face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) - - landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True) - - rect_area = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]])) - landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] ) - - if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area - continue - - if self.debug_dir is not None: - LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type, transparent_mask=True) - - if src_dflimg is not None and filename_path.suffix == '.jpg': - #if extracting from dflimg and jpg copy it in order not to lose quality - output_file = str(self.final_output_path / filename_path.name) - if str(filename_path) != str(output_file): - shutil.copy ( str(filename_path), str(output_file) ) - else: - output_file = '{}_{}{}'.format(str(self.final_output_path / filename_path.stem), str(face_idx), '.jpg') - cv2_imwrite(output_file, face_image, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) - - DFLJPG.embed_data(output_file, face_type=FaceType.toString(self.face_type), - landmarks=face_image_landmarks.tolist(), - source_filename=filename_path.name, - source_rect=rect, - source_landmarks=image_landmarks.tolist(), - image_to_face_mat=image_to_face_mat, - pitch_yaw_roll=data.pitch_yaw_roll - ) - - data.final_output_files.append (output_file) - face_idx += 1 - data.faces_detected = face_idx - - if self.debug_dir is not None: - cv2_imwrite(debug_output_file, debug_image, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) - - return data - - elif self.type == 'fanseg': - if src_dflimg is not None: - fanseg_mask = self.e.extract( image / 255.0 ) - src_dflimg.embed_and_set( filename_path_str, - fanseg_mask=fanseg_mask, - #fanseg_mask_ver=FANSegmentator.VERSION, - ) - - #overridable - def get_data_name (self, data): - #return string identificator of your data - return data.filename - - #override - def __init__(self, input_data, type, image_size=None, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, final_output_path=None): - self.input_data = input_data - self.type = type - self.image_size = image_size - self.face_type = face_type - self.debug_dir = debug_dir - self.final_output_path = final_output_path - self.manual = manual - self.manual_window_size = manual_window_size - self.result = [] - - self.devices = ExtractSubprocessor.get_devices_for_config(self.manual, self.type, multi_gpu, cpu_only) - - no_response_time_sec = 60 if not self.manual else 999999 - super().__init__('Extractor', ExtractSubprocessor.Cli, no_response_time_sec) - - #override - def on_check_run(self): - if len(self.devices) == 0: - io.log_err("No devices found to start subprocessor.") - return False - return True - - #override - def on_clients_initialized(self): - if self.manual == True: - self.wnd_name = 'Manual pass' - io.named_window(self.wnd_name) - io.capture_mouse(self.wnd_name) - io.capture_keys(self.wnd_name) - - self.cache_original_image = (None, None) - self.cache_image = (None, None) - self.cache_text_lines_img = (None, None) - self.hide_help = False - self.landmarks_accurate = True - - self.landmarks = None - self.x = 0 - self.y = 0 - self.rect_size = 100 - self.rect_locked = False - self.extract_needed = True - - io.progress_bar (None, len (self.input_data)) - - #override - def on_clients_finalized(self): - if self.manual == True: - io.destroy_all_windows() - - io.progress_bar_close() - - #override - def process_info_generator(self): - base_dict = {'type' : self.type, - 'image_size': self.image_size, - 'face_type': self.face_type, - 'debug_dir': self.debug_dir, - 'final_output_dir': str(self.final_output_path)} - - - for (device_idx, device_type, device_name, device_total_vram_gb) in self.devices: - client_dict = base_dict.copy() - client_dict['device_idx'] = device_idx - client_dict['device_name'] = device_name - client_dict['device_type'] = device_type - yield client_dict['device_name'], {}, client_dict - - #override - def get_data(self, host_dict): - if not self.manual: - if len (self.input_data) > 0: - return self.input_data.pop(0) - else: - need_remark_face = False - redraw_needed = False - while len (self.input_data) > 0: - data = self.input_data[0] - filename, data_rects, data_landmarks = data.filename, data.rects, data.landmarks - is_frame_done = False - - if need_remark_face: # need remark image from input data that already has a marked face? - need_remark_face = False - if len(data_rects) != 0: # If there was already a face then lock the rectangle to it until the mouse is clicked - self.rect = data_rects.pop() - self.landmarks = data_landmarks.pop() - data_rects.clear() - data_landmarks.clear() - redraw_needed = True - self.rect_locked = True - self.rect_size = ( self.rect[2] - self.rect[0] ) / 2 - self.x = ( self.rect[0] + self.rect[2] ) / 2 - self.y = ( self.rect[1] + self.rect[3] ) / 2 - - if len(data_rects) == 0: - if self.cache_original_image[0] == filename: - self.original_image = self.cache_original_image[1] - else: - self.original_image = cv2_imread( filename ) - self.cache_original_image = (filename, self.original_image ) - - (h,w,c) = self.original_image.shape - self.view_scale = 1.0 if self.manual_window_size == 0 else self.manual_window_size / ( h * (16.0/9.0) ) - - if self.cache_image[0] == (h,w,c) + (self.view_scale,filename): - self.image = self.cache_image[1] - else: - self.image = cv2.resize (self.original_image, ( int(w*self.view_scale), int(h*self.view_scale) ), interpolation=cv2.INTER_LINEAR) - self.cache_image = ( (h,w,c) + (self.view_scale,filename), self.image ) - - (h,w,c) = self.image.shape - - sh = (0,0, w, min(100, h) ) - if self.cache_text_lines_img[0] == sh: - self.text_lines_img = self.cache_text_lines_img[1] - else: - self.text_lines_img = (imagelib.get_draw_text_lines ( self.image, sh, - [ '[Mouse click] - lock/unlock selection', - '[Mouse wheel] - change rect', - '[Enter] / [Space] - confirm / skip frame', - '[,] [.]- prev frame, next frame. [Q] - skip remaining frames', - '[a] - accuracy on/off (more fps)', - '[h] - hide this help' - ], (1, 1, 1) )*255).astype(np.uint8) - - self.cache_text_lines_img = (sh, self.text_lines_img) - - while True: - io.process_messages(0.0001) - - new_x = self.x - new_y = self.y - new_rect_size = self.rect_size - - mouse_events = io.get_mouse_events(self.wnd_name) - for ev in mouse_events: - (x, y, ev, flags) = ev - if ev == io.EVENT_MOUSEWHEEL and not self.rect_locked: - mod = 1 if flags > 0 else -1 - diff = 1 if new_rect_size <= 40 else np.clip(new_rect_size / 10, 1, 10) - new_rect_size = max (5, new_rect_size + diff*mod) - elif ev == io.EVENT_LBUTTONDOWN: - self.rect_locked = not self.rect_locked - self.extract_needed = True - elif not self.rect_locked: - new_x = np.clip (x, 0, w-1) / self.view_scale - new_y = np.clip (y, 0, h-1) / self.view_scale - - key_events = io.get_key_events(self.wnd_name) - key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) - - if key == ord('\r') or key == ord('\n'): - #confirm frame - is_frame_done = True - data_rects.append (self.rect) - data_landmarks.append (self.landmarks) - break - elif key == ord(' '): - #confirm skip frame - is_frame_done = True - break - elif key == ord(',') and len(self.result) > 0: - #go prev frame - - if self.rect_locked: - self.rect_locked = False - # Only save the face if the rect is still locked - data_rects.append (self.rect) - data_landmarks.append (self.landmarks) - - - self.input_data.insert(0, self.result.pop() ) - io.progress_bar_inc(-1) - need_remark_face = True - - break - elif key == ord('.'): - #go next frame - - if self.rect_locked: - self.rect_locked = False - # Only save the face if the rect is still locked - data_rects.append (self.rect) - data_landmarks.append (self.landmarks) - - need_remark_face = True - is_frame_done = True - break - elif key == ord('q'): - #skip remaining - - if self.rect_locked: - self.rect_locked = False - data_rects.append (self.rect) - data_landmarks.append (self.landmarks) - - while len(self.input_data) > 0: - self.result.append( self.input_data.pop(0) ) - io.progress_bar_inc(1) - - break - - elif key == ord('h'): - self.hide_help = not self.hide_help - break - elif key == ord('a'): - self.landmarks_accurate = not self.landmarks_accurate - break - - if self.x != new_x or \ - self.y != new_y or \ - self.rect_size != new_rect_size or \ - self.extract_needed or \ - redraw_needed: - self.x = new_x - self.y = new_y - self.rect_size = new_rect_size - self.rect = ( int(self.x-self.rect_size), - int(self.y-self.rect_size), - int(self.x+self.rect_size), - int(self.y+self.rect_size) ) - - if redraw_needed: - redraw_needed = False - return ExtractSubprocessor.Data (filename, landmarks_accurate=self.landmarks_accurate) - else: - return ExtractSubprocessor.Data (filename, rects=[self.rect], landmarks_accurate=self.landmarks_accurate) - - else: - is_frame_done = True - - if is_frame_done: - self.result.append ( data ) - self.input_data.pop(0) - io.progress_bar_inc(1) - self.extract_needed = True - self.rect_locked = False - - return None - - #override - def on_data_return (self, host_dict, data): - if not self.manual: - self.input_data.insert(0, data) - - #override - def on_result (self, host_dict, data, result): - if self.manual == True: - filename, landmarks = result.filename, result.landmarks - if len(landmarks) != 0: - self.landmarks = landmarks[0] - - (h,w,c) = self.image.shape - - if not self.hide_help: - image = cv2.addWeighted (self.image,1.0,self.text_lines_img,1.0,0) - else: - image = self.image.copy() - - view_rect = (np.array(self.rect) * self.view_scale).astype(np.int).tolist() - view_landmarks = (np.array(self.landmarks) * self.view_scale).astype(np.int).tolist() - - if self.rect_size <= 40: - scaled_rect_size = h // 3 if w > h else w // 3 - - p1 = (self.x - self.rect_size, self.y - self.rect_size) - p2 = (self.x + self.rect_size, self.y - self.rect_size) - p3 = (self.x - self.rect_size, self.y + self.rect_size) - - wh = h if h < w else w - np1 = (w / 2 - wh / 4, h / 2 - wh / 4) - np2 = (w / 2 + wh / 4, h / 2 - wh / 4) - np3 = (w / 2 - wh / 4, h / 2 + wh / 4) - - mat = cv2.getAffineTransform( np.float32([p1,p2,p3])*self.view_scale, np.float32([np1,np2,np3]) ) - image = cv2.warpAffine(image, mat,(w,h) ) - view_landmarks = LandmarksProcessor.transform_points (view_landmarks, mat) - - landmarks_color = (255,255,0) if self.rect_locked else (0,255,0) - LandmarksProcessor.draw_rect_landmarks (image, view_rect, view_landmarks, self.image_size, self.face_type, landmarks_color=landmarks_color) - self.extract_needed = False - - io.show_image (self.wnd_name, image) - else: - self.result.append ( result ) - io.progress_bar_inc(1) - - - - #override - def get_result(self): - return self.result - - @staticmethod - def get_devices_for_config (manual, type, multi_gpu, cpu_only): - backend = nnlib.device.backend - if 'cpu' in backend: - cpu_only = True - - if 'rects' in type or type == 'landmarks' or type == 'fanseg': - if not cpu_only and type == 'rects-mt' and backend == "plaidML": #plaidML works with MT very slowly - cpu_only = True - - if not cpu_only: - devices = [] - if not manual and multi_gpu: - devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2) - - if len(devices) == 0: - idx = nnlib.device.getBestValidDeviceIdx() - if idx != -1: - devices = [idx] - - if len(devices) == 0: - cpu_only = True - - result = [] - for idx in devices: - dev_name = nnlib.device.getDeviceName(idx) - dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx) - - if not manual and (type == 'rects-dlib' or type == 'rects-mt' ): - for i in range ( int (max (1, dev_vram / 2) ) ): - result += [ (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram) ] - else: - result += [ (idx, 'GPU', dev_name, dev_vram) ] - - return result - - if cpu_only: - if manual: - return [ (0, 'CPU', 'CPU', 0 ) ] - else: - return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ] - - elif type == 'final': - return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range(min(8, multiprocessing.cpu_count())) ] - -class DeletedFilesSearcherSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.debug_paths_stems = client_dict['debug_paths_stems'] - return None - - #override - def process_data(self, data): - input_path_stem = Path(data[0]).stem - return any ( [ input_path_stem == d_stem for d_stem in self.debug_paths_stems] ) - - #override - def get_data_name (self, data): - #return string identificator of your data - return data[0] - - #override - def __init__(self, input_paths, debug_paths ): - self.input_paths = input_paths - self.debug_paths_stems = [ Path(d).stem for d in debug_paths] - self.result = [] - super().__init__('DeletedFilesSearcherSubprocessor', DeletedFilesSearcherSubprocessor.Cli, 60) - - #override - def process_info_generator(self): - for i in range(min(multiprocessing.cpu_count(), 8)): - yield 'CPU%d' % (i), {}, {'debug_paths_stems' : self.debug_paths_stems} - - #override - def on_clients_initialized(self): - io.progress_bar ("Searching deleted files", len (self.input_paths)) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def get_data(self, host_dict): - if len (self.input_paths) > 0: - return [self.input_paths.pop(0)] - return None - - #override - def on_data_return (self, host_dict, data): - self.input_paths.insert(0, data[0]) - - #override - def on_result (self, host_dict, data, result): - if result == False: - self.result.append( data[0] ) - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.result - - -#currently unused -def extract_fanseg(input_dir, device_args={} ): - multi_gpu = device_args.get('multi_gpu', False) - cpu_only = device_args.get('cpu_only', False) - - input_path = Path(input_dir) - if not input_path.exists(): - raise ValueError('Input directory not found. Please ensure it exists.') - - paths_to_extract = [] - for filename in Path_utils.get_image_paths(input_path) : - filepath = Path(filename) - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is not None: - paths_to_extract.append (filepath) - - paths_to_extract_len = len(paths_to_extract) - if paths_to_extract_len > 0: - io.log_info ("Performing extract fanseg for %d files..." % (paths_to_extract_len) ) - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in paths_to_extract ], 'fanseg', multi_gpu=multi_gpu, cpu_only=cpu_only).run() - -def extract_umd_csv(input_file_csv, - image_size=256, - face_type='full_face', - device_args={} ): - - #extract faces from umdfaces.io dataset csv file with pitch,yaw,roll info. - multi_gpu = device_args.get('multi_gpu', False) - cpu_only = device_args.get('cpu_only', False) - face_type = FaceType.fromString(face_type) - - input_file_csv_path = Path(input_file_csv) - if not input_file_csv_path.exists(): - raise ValueError('input_file_csv not found. Please ensure it exists.') - - input_file_csv_root_path = input_file_csv_path.parent - output_path = input_file_csv_path.parent / ('aligned_' + input_file_csv_path.name) - - io.log_info("Output dir is %s." % (str(output_path)) ) - - if output_path.exists(): - output_images_paths = Path_utils.get_image_paths(output_path) - if len(output_images_paths) > 0: - io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) - for filename in output_images_paths: - Path(filename).unlink() - else: - output_path.mkdir(parents=True, exist_ok=True) - - try: - with open( str(input_file_csv_path), 'r') as f: - csv_file = f.read() - except Exception as e: - io.log_err("Unable to open or read file " + str(input_file_csv_path) + ": " + str(e) ) - return - - strings = csv_file.split('\n') - keys = strings[0].split(',') - keys_len = len(keys) - csv_data = [] - for i in range(1, len(strings)): - values = strings[i].split(',') - if keys_len != len(values): - io.log_err("Wrong string in csv file, skipping.") - continue - - csv_data += [ { keys[n] : values[n] for n in range(keys_len) } ] - - data = [] - for d in csv_data: - filename = input_file_csv_root_path / d['FILE'] - - pitch, yaw, roll = float(d['PITCH']), float(d['YAW']), float(d['ROLL']) - if pitch < -90 or pitch > 90 or yaw < -90 or yaw > 90 or roll < -90 or roll > 90: - continue - - pitch_yaw_roll = pitch/90.0, yaw/90.0, roll/90.0 - - x,y,w,h = float(d['FACE_X']), float(d['FACE_Y']), float(d['FACE_WIDTH']), float(d['FACE_HEIGHT']) - - data += [ ExtractSubprocessor.Data(filename=filename, rects=[ [x,y,x+w,y+h] ], pitch_yaw_roll=pitch_yaw_roll) ] - - images_found = len(data) - faces_detected = 0 - if len(data) > 0: - io.log_info ("Performing 2nd pass from csv file...") - data = ExtractSubprocessor (data, 'landmarks', multi_gpu=multi_gpu, cpu_only=cpu_only).run() - - io.log_info ('Performing 3rd pass...') - data = ExtractSubprocessor (data, 'final', image_size, face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() - faces_detected += sum([d.faces_detected for d in data]) - - - io.log_info ('-------------------------') - io.log_info ('Images found: %d' % (images_found) ) - io.log_info ('Faces detected: %d' % (faces_detected) ) - io.log_info ('-------------------------') - -def main(input_dir, - output_dir, - debug_dir=None, - detector='mt', - manual_fix=False, - manual_output_debug_fix=False, - manual_window_size=1368, - image_size=256, - face_type='full_face', - device_args={}): - - input_path = Path(input_dir) - output_path = Path(output_dir) - face_type = FaceType.fromString(face_type) - - multi_gpu = device_args.get('multi_gpu', False) - cpu_only = device_args.get('cpu_only', False) - - if not input_path.exists(): - raise ValueError('Input directory not found. Please ensure it exists.') - - if output_path.exists(): - if not manual_output_debug_fix and input_path != output_path: - output_images_paths = Path_utils.get_image_paths(output_path) - if len(output_images_paths) > 0: - io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) - for filename in output_images_paths: - Path(filename).unlink() - else: - output_path.mkdir(parents=True, exist_ok=True) - - if manual_output_debug_fix: - if debug_dir is None: - raise ValueError('debug-dir must be specified') - detector = 'manual' - io.log_info('Performing re-extract frames which were deleted from _debug directory.') - - input_path_image_paths = Path_utils.get_image_unique_filestem_paths(input_path, verbose_print_func=io.log_info) - if debug_dir is not None: - debug_output_path = Path(debug_dir) - - if manual_output_debug_fix: - if not debug_output_path.exists(): - raise ValueError("%s not found " % ( str(debug_output_path) )) - - input_path_image_paths = DeletedFilesSearcherSubprocessor (input_path_image_paths, Path_utils.get_image_paths(debug_output_path) ).run() - input_path_image_paths = sorted (input_path_image_paths) - io.log_info('Found %d images.' % (len(input_path_image_paths))) - else: - if debug_output_path.exists(): - for filename in Path_utils.get_image_paths(debug_output_path): - Path(filename).unlink() - else: - debug_output_path.mkdir(parents=True, exist_ok=True) - - images_found = len(input_path_image_paths) - faces_detected = 0 - if images_found != 0: - if detector == 'manual': - io.log_info ('Performing manual extract...') - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run() - else: - io.log_info ('Performing 1st pass...') - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() - - io.log_info ('Performing 2nd pass...') - data = ExtractSubprocessor (data, 'landmarks', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() - - io.log_info ('Performing 3rd pass...') - data = ExtractSubprocessor (data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() - faces_detected += sum([d.faces_detected for d in data]) - - if manual_fix: - if all ( np.array ( [ d.faces_detected > 0 for d in data] ) == True ): - io.log_info ('All faces are detected, manual fix not needed.') - else: - fix_data = [ ExtractSubprocessor.Data(d.filename) for d in data if d.faces_detected == 0 ] - io.log_info ('Performing manual fix for %d images...' % (len(fix_data)) ) - fix_data = ExtractSubprocessor (fix_data, 'landmarks', image_size, face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run() - fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() - faces_detected += sum([d.faces_detected for d in fix_data]) - - - io.log_info ('-------------------------') - io.log_info ('Images found: %d' % (images_found) ) - io.log_info ('Faces detected: %d' % (faces_detected) ) - io.log_info ('-------------------------') +import traceback +import os +import sys +import time +import multiprocessing +import shutil +from pathlib import Path +import numpy as np +import mathlib +import imagelib +import cv2 +from utils import Path_utils +from utils.DFLPNG import DFLPNG +from utils.DFLJPG import DFLJPG +from utils.cv2_utils import * +import facelib +from facelib import FaceType +from facelib import LandmarksProcessor +from facelib import FANSegmentator +from nnlib import nnlib +from joblib import Subprocessor +from interact import interact as io + +class ExtractSubprocessor(Subprocessor): + class Data(object): + def __init__(self, filename=None, rects=None, landmarks = None, landmarks_accurate=True, pitch_yaw_roll=None, final_output_files = None): + self.filename = filename + self.rects = rects or [] + self.rects_rotation = 0 + self.landmarks_accurate = landmarks_accurate + self.landmarks = landmarks or [] + self.pitch_yaw_roll = pitch_yaw_roll + self.final_output_files = final_output_files or [] + self.faces_detected = 0 + + class Cli(Subprocessor.Cli): + + #override + def on_initialize(self, client_dict): + self.type = client_dict['type'] + self.image_size = client_dict['image_size'] + self.face_type = client_dict['face_type'] + self.device_idx = client_dict['device_idx'] + self.cpu_only = client_dict['device_type'] == 'CPU' + self.final_output_path = Path(client_dict['final_output_dir']) if 'final_output_dir' in client_dict.keys() else None + self.debug_dir = client_dict['debug_dir'] + + self.cached_image = (None, None) + + self.e = None + device_config = nnlib.DeviceConfig ( cpu_only=self.cpu_only, force_gpu_idx=self.device_idx, allow_growth=True) + self.device_vram = device_config.gpu_vram_gb[0] + + intro_str = 'Running on %s.' % (client_dict['device_name']) + if not self.cpu_only and self.device_vram <= 2: + intro_str += " Recommended to close all programs using this device." + + self.log_info (intro_str) + + if 'rects' in self.type: + if self.type == 'rects-mt': + nnlib.import_all (device_config) + self.e = facelib.MTCExtractor() + elif self.type == 'rects-dlib': + nnlib.import_dlib (device_config) + self.e = facelib.DLIBExtractor(nnlib.dlib) + elif self.type == 'rects-s3fd': + nnlib.import_all (device_config) + self.e = facelib.S3FDExtractor() + else: + raise ValueError ("Wrong type.") + + if self.e is not None: + self.e.__enter__() + + elif self.type == 'landmarks': + nnlib.import_all (device_config) + self.e = facelib.LandmarksExtractor(nnlib.keras) + self.e.__enter__() + if self.device_vram >= 2: + self.second_pass_e = facelib.S3FDExtractor() + self.second_pass_e.__enter__() + else: + self.second_pass_e = None + + elif self.type == 'fanseg': + nnlib.import_all (device_config) + self.e = facelib.FANSegmentator(256, FaceType.toString(FaceType.FULL) ) + self.e.__enter__() + + elif self.type == 'final': + pass + + #override + def on_finalize(self): + if self.e is not None: + self.e.__exit__() + + #override + def process_data(self, data): + filename_path = Path( data.filename ) + + filename_path_str = str(filename_path) + if self.cached_image[0] == filename_path_str: + image = self.cached_image[1] #cached image for manual extractor + else: + image = cv2_imread( filename_path_str ) + + if image is None: + self.log_err ( 'Failed to extract %s, reason: cv2_imread() fail.' % ( str(filename_path) ) ) + return data + + image_shape = image.shape + if len(image_shape) == 2: + h, w = image.shape + image = image[:,:,np.newaxis] + ch = 1 + else: + h, w, ch = image.shape + + if ch == 1: + image = np.repeat (image, 3, -1) + elif ch == 4: + image = image[:,:,0:3] + + wm, hm = w % 2, h % 2 + if wm + hm != 0: #fix odd image + image = image[0:h-hm,0:w-wm,:] + self.cached_image = ( filename_path_str, image ) + + src_dflimg = None + h, w, ch = image.shape + if h == w: + #extracting from already extracted jpg image? + if filename_path.suffix == '.png': + src_dflimg = DFLPNG.load ( str(filename_path) ) + if filename_path.suffix == '.jpg': + src_dflimg = DFLJPG.load ( str(filename_path) ) + + if 'rects' in self.type: + if min(w,h) < 128: + self.log_err ( 'Image is too small %s : [%d, %d]' % ( str(filename_path), w, h ) ) + data.rects = [] + else: + for rot in ([0, 90, 270, 180]): + data.rects_rotation = rot + if rot == 0: + rotated_image = image + elif rot == 90: + rotated_image = image.swapaxes( 0,1 )[:,::-1,:] + elif rot == 180: + rotated_image = image[::-1,::-1,:] + elif rot == 270: + rotated_image = image.swapaxes( 0,1 )[::-1,:,:] + + rects = data.rects = self.e.extract (rotated_image, is_bgr=True) + if len(rects) != 0: + break + + return data + + elif self.type == 'landmarks': + + if data.rects_rotation == 0: + rotated_image = image + elif data.rects_rotation == 90: + rotated_image = image.swapaxes( 0,1 )[:,::-1,:] + elif data.rects_rotation == 180: + rotated_image = image[::-1,::-1,:] + elif data.rects_rotation == 270: + rotated_image = image.swapaxes( 0,1 )[::-1,:,:] + + data.landmarks = self.e.extract (rotated_image, data.rects, self.second_pass_e if (src_dflimg is None and data.landmarks_accurate) else None, is_bgr=True) + if data.rects_rotation != 0: + for i, (rect, lmrks) in enumerate(zip(data.rects, data.landmarks)): + new_rect, new_lmrks = rect, lmrks + (l,t,r,b) = rect + if data.rects_rotation == 90: + new_rect = ( t, h-l, b, h-r) + if lmrks is not None: + new_lmrks = lmrks[:,::-1].copy() + new_lmrks[:,1] = h - new_lmrks[:,1] + elif data.rects_rotation == 180: + if lmrks is not None: + new_rect = ( w-l, h-t, w-r, h-b) + new_lmrks = lmrks.copy() + new_lmrks[:,0] = w - new_lmrks[:,0] + new_lmrks[:,1] = h - new_lmrks[:,1] + elif data.rects_rotation == 270: + new_rect = ( w-b, l, w-t, r ) + if lmrks is not None: + new_lmrks = lmrks[:,::-1].copy() + new_lmrks[:,0] = w - new_lmrks[:,0] + data.rects[i], data.landmarks[i] = new_rect, new_lmrks + + return data + + elif self.type == 'final': + data.final_output_files = [] + rects = data.rects + landmarks = data.landmarks + + if self.debug_dir is not None: + debug_output_file = str( Path(self.debug_dir) / (filename_path.stem+'.jpg') ) + debug_image = image.copy() + + if src_dflimg is not None and len(rects) != 1: + #if re-extracting from dflimg and more than 1 or zero faces detected - dont process and just copy it + print("src_dflimg is not None and len(rects) != 1", str(filename_path) ) + output_file = str(self.final_output_path / filename_path.name) + if str(filename_path) != str(output_file): + shutil.copy ( str(filename_path), str(output_file) ) + data.final_output_files.append (output_file) + else: + face_idx = 0 + for rect, image_landmarks in zip( rects, landmarks ): + if src_dflimg is not None and face_idx > 1: + #cannot extract more than 1 face from dflimg + break + + if image_landmarks is None: + continue + + rect = np.array(rect) + + if self.face_type == FaceType.MARK_ONLY: + face_image = image + face_image_landmarks = image_landmarks + else: + image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type) + face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4) + face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) + + landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True) + + rect_area = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]])) + landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] ) + + if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area + continue + + if self.debug_dir is not None: + LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type, transparent_mask=True) + + if src_dflimg is not None and filename_path.suffix == '.jpg': + #if extracting from dflimg and jpg copy it in order not to lose quality + output_file = str(self.final_output_path / filename_path.name) + if str(filename_path) != str(output_file): + shutil.copy ( str(filename_path), str(output_file) ) + else: + output_file = '{}_{}{}'.format(str(self.final_output_path / filename_path.stem), str(face_idx), '.jpg') + cv2_imwrite(output_file, face_image, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) + + DFLJPG.embed_data(output_file, face_type=FaceType.toString(self.face_type), + landmarks=face_image_landmarks.tolist(), + source_filename=filename_path.name, + source_rect=rect, + source_landmarks=image_landmarks.tolist(), + image_to_face_mat=image_to_face_mat, + pitch_yaw_roll=data.pitch_yaw_roll + ) + + data.final_output_files.append (output_file) + face_idx += 1 + data.faces_detected = face_idx + + if self.debug_dir is not None: + cv2_imwrite(debug_output_file, debug_image, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) + + return data + + elif self.type == 'fanseg': + if src_dflimg is not None: + fanseg_mask = self.e.extract( image / 255.0 ) + src_dflimg.embed_and_set( filename_path_str, + fanseg_mask=fanseg_mask, + #fanseg_mask_ver=FANSegmentator.VERSION, + ) + + #overridable + def get_data_name (self, data): + #return string identificator of your data + return data.filename + + #override + def __init__(self, input_data, type, image_size=None, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, final_output_path=None): + self.input_data = input_data + self.type = type + self.image_size = image_size + self.face_type = face_type + self.debug_dir = debug_dir + self.final_output_path = final_output_path + self.manual = manual + self.manual_window_size = manual_window_size + self.result = [] + + self.devices = ExtractSubprocessor.get_devices_for_config(self.manual, self.type, multi_gpu, cpu_only) + + no_response_time_sec = 60 if not self.manual else 999999 + super().__init__('Extractor', ExtractSubprocessor.Cli, no_response_time_sec) + + #override + def on_check_run(self): + if len(self.devices) == 0: + io.log_err("No devices found to start subprocessor.") + return False + return True + + #override + def on_clients_initialized(self): + if self.manual == True: + self.wnd_name = 'Manual pass' + io.named_window(self.wnd_name) + io.capture_mouse(self.wnd_name) + io.capture_keys(self.wnd_name) + + self.cache_original_image = (None, None) + self.cache_image = (None, None) + self.cache_text_lines_img = (None, None) + self.hide_help = False + self.landmarks_accurate = True + + self.landmarks = None + self.x = 0 + self.y = 0 + self.rect_size = 100 + self.rect_locked = False + self.extract_needed = True + + io.progress_bar (None, len (self.input_data)) + + #override + def on_clients_finalized(self): + if self.manual == True: + io.destroy_all_windows() + + io.progress_bar_close() + + #override + def process_info_generator(self): + base_dict = {'type' : self.type, + 'image_size': self.image_size, + 'face_type': self.face_type, + 'debug_dir': self.debug_dir, + 'final_output_dir': str(self.final_output_path)} + + + for (device_idx, device_type, device_name, device_total_vram_gb) in self.devices: + client_dict = base_dict.copy() + client_dict['device_idx'] = device_idx + client_dict['device_name'] = device_name + client_dict['device_type'] = device_type + yield client_dict['device_name'], {}, client_dict + + #override + def get_data(self, host_dict): + if not self.manual: + if len (self.input_data) > 0: + return self.input_data.pop(0) + else: + need_remark_face = False + redraw_needed = False + while len (self.input_data) > 0: + data = self.input_data[0] + filename, data_rects, data_landmarks = data.filename, data.rects, data.landmarks + is_frame_done = False + + if need_remark_face: # need remark image from input data that already has a marked face? + need_remark_face = False + if len(data_rects) != 0: # If there was already a face then lock the rectangle to it until the mouse is clicked + self.rect = data_rects.pop() + self.landmarks = data_landmarks.pop() + data_rects.clear() + data_landmarks.clear() + redraw_needed = True + self.rect_locked = True + self.rect_size = ( self.rect[2] - self.rect[0] ) / 2 + self.x = ( self.rect[0] + self.rect[2] ) / 2 + self.y = ( self.rect[1] + self.rect[3] ) / 2 + + if len(data_rects) == 0: + if self.cache_original_image[0] == filename: + self.original_image = self.cache_original_image[1] + else: + self.original_image = cv2_imread( filename ) + self.cache_original_image = (filename, self.original_image ) + + (h,w,c) = self.original_image.shape + self.view_scale = 1.0 if self.manual_window_size == 0 else self.manual_window_size / ( h * (16.0/9.0) ) + + if self.cache_image[0] == (h,w,c) + (self.view_scale,filename): + self.image = self.cache_image[1] + else: + self.image = cv2.resize (self.original_image, ( int(w*self.view_scale), int(h*self.view_scale) ), interpolation=cv2.INTER_LINEAR) + self.cache_image = ( (h,w,c) + (self.view_scale,filename), self.image ) + + (h,w,c) = self.image.shape + + sh = (0,0, w, min(100, h) ) + if self.cache_text_lines_img[0] == sh: + self.text_lines_img = self.cache_text_lines_img[1] + else: + self.text_lines_img = (imagelib.get_draw_text_lines ( self.image, sh, + [ '[Mouse click] - lock/unlock selection', + '[Mouse wheel] - change rect', + '[Enter] / [Space] - confirm / skip frame', + '[,] [.]- prev frame, next frame. [Q] - skip remaining frames', + '[a] - accuracy on/off (more fps)', + '[h] - hide this help' + ], (1, 1, 1) )*255).astype(np.uint8) + + self.cache_text_lines_img = (sh, self.text_lines_img) + + while True: + io.process_messages(0.0001) + + new_x = self.x + new_y = self.y + new_rect_size = self.rect_size + + mouse_events = io.get_mouse_events(self.wnd_name) + for ev in mouse_events: + (x, y, ev, flags) = ev + if ev == io.EVENT_MOUSEWHEEL and not self.rect_locked: + mod = 1 if flags > 0 else -1 + diff = 1 if new_rect_size <= 40 else np.clip(new_rect_size / 10, 1, 10) + new_rect_size = max (5, new_rect_size + diff*mod) + elif ev == io.EVENT_LBUTTONDOWN: + self.rect_locked = not self.rect_locked + self.extract_needed = True + elif not self.rect_locked: + new_x = np.clip (x, 0, w-1) / self.view_scale + new_y = np.clip (y, 0, h-1) / self.view_scale + + key_events = io.get_key_events(self.wnd_name) + key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) + + if key == ord('\r') or key == ord('\n'): + #confirm frame + is_frame_done = True + data_rects.append (self.rect) + data_landmarks.append (self.landmarks) + break + elif key == ord(' '): + #confirm skip frame + is_frame_done = True + break + elif key == ord(',') and len(self.result) > 0: + #go prev frame + + if self.rect_locked: + self.rect_locked = False + # Only save the face if the rect is still locked + data_rects.append (self.rect) + data_landmarks.append (self.landmarks) + + + self.input_data.insert(0, self.result.pop() ) + io.progress_bar_inc(-1) + need_remark_face = True + + break + elif key == ord('.'): + #go next frame + + if self.rect_locked: + self.rect_locked = False + # Only save the face if the rect is still locked + data_rects.append (self.rect) + data_landmarks.append (self.landmarks) + + need_remark_face = True + is_frame_done = True + break + elif key == ord('q'): + #skip remaining + + if self.rect_locked: + self.rect_locked = False + data_rects.append (self.rect) + data_landmarks.append (self.landmarks) + + while len(self.input_data) > 0: + self.result.append( self.input_data.pop(0) ) + io.progress_bar_inc(1) + + break + + elif key == ord('h'): + self.hide_help = not self.hide_help + break + elif key == ord('a'): + self.landmarks_accurate = not self.landmarks_accurate + break + + if self.x != new_x or \ + self.y != new_y or \ + self.rect_size != new_rect_size or \ + self.extract_needed or \ + redraw_needed: + self.x = new_x + self.y = new_y + self.rect_size = new_rect_size + self.rect = ( int(self.x-self.rect_size), + int(self.y-self.rect_size), + int(self.x+self.rect_size), + int(self.y+self.rect_size) ) + + if redraw_needed: + redraw_needed = False + return ExtractSubprocessor.Data (filename, landmarks_accurate=self.landmarks_accurate) + else: + return ExtractSubprocessor.Data (filename, rects=[self.rect], landmarks_accurate=self.landmarks_accurate) + + else: + is_frame_done = True + + if is_frame_done: + self.result.append ( data ) + self.input_data.pop(0) + io.progress_bar_inc(1) + self.extract_needed = True + self.rect_locked = False + + return None + + #override + def on_data_return (self, host_dict, data): + if not self.manual: + self.input_data.insert(0, data) + + #override + def on_result (self, host_dict, data, result): + if self.manual == True: + filename, landmarks = result.filename, result.landmarks + if len(landmarks) != 0: + self.landmarks = landmarks[0] + + (h,w,c) = self.image.shape + + if not self.hide_help: + image = cv2.addWeighted (self.image,1.0,self.text_lines_img,1.0,0) + else: + image = self.image.copy() + + view_rect = (np.array(self.rect) * self.view_scale).astype(np.int).tolist() + view_landmarks = (np.array(self.landmarks) * self.view_scale).astype(np.int).tolist() + + if self.rect_size <= 40: + scaled_rect_size = h // 3 if w > h else w // 3 + + p1 = (self.x - self.rect_size, self.y - self.rect_size) + p2 = (self.x + self.rect_size, self.y - self.rect_size) + p3 = (self.x - self.rect_size, self.y + self.rect_size) + + wh = h if h < w else w + np1 = (w / 2 - wh / 4, h / 2 - wh / 4) + np2 = (w / 2 + wh / 4, h / 2 - wh / 4) + np3 = (w / 2 - wh / 4, h / 2 + wh / 4) + + mat = cv2.getAffineTransform( np.float32([p1,p2,p3])*self.view_scale, np.float32([np1,np2,np3]) ) + image = cv2.warpAffine(image, mat,(w,h) ) + view_landmarks = LandmarksProcessor.transform_points (view_landmarks, mat) + + landmarks_color = (255,255,0) if self.rect_locked else (0,255,0) + LandmarksProcessor.draw_rect_landmarks (image, view_rect, view_landmarks, self.image_size, self.face_type, landmarks_color=landmarks_color) + self.extract_needed = False + + io.show_image (self.wnd_name, image) + else: + self.result.append ( result ) + io.progress_bar_inc(1) + + + + #override + def get_result(self): + return self.result + + @staticmethod + def get_devices_for_config (manual, type, multi_gpu, cpu_only): + backend = nnlib.device.backend + if 'cpu' in backend: + cpu_only = True + + if 'rects' in type or type == 'landmarks' or type == 'fanseg': + if not cpu_only and type == 'rects-mt' and backend == "plaidML": #plaidML works with MT very slowly + cpu_only = True + + if not cpu_only: + devices = [] + if not manual and multi_gpu: + devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2) + + if len(devices) == 0: + idx = nnlib.device.getBestValidDeviceIdx() + if idx != -1: + devices = [idx] + + if len(devices) == 0: + cpu_only = True + + result = [] + for idx in devices: + dev_name = nnlib.device.getDeviceName(idx) + dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx) + + if not manual and (type == 'rects-dlib' or type == 'rects-mt' ): + for i in range ( int (max (1, dev_vram / 2) ) ): + result += [ (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram) ] + else: + result += [ (idx, 'GPU', dev_name, dev_vram) ] + + return result + + if cpu_only: + if manual: + return [ (0, 'CPU', 'CPU', 0 ) ] + else: + return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ] + + elif type == 'final': + return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range(min(8, multiprocessing.cpu_count())) ] + +class DeletedFilesSearcherSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.debug_paths_stems = client_dict['debug_paths_stems'] + return None + + #override + def process_data(self, data): + input_path_stem = Path(data[0]).stem + return any ( [ input_path_stem == d_stem for d_stem in self.debug_paths_stems] ) + + #override + def get_data_name (self, data): + #return string identificator of your data + return data[0] + + #override + def __init__(self, input_paths, debug_paths ): + self.input_paths = input_paths + self.debug_paths_stems = [ Path(d).stem for d in debug_paths] + self.result = [] + super().__init__('DeletedFilesSearcherSubprocessor', DeletedFilesSearcherSubprocessor.Cli, 60) + + #override + def process_info_generator(self): + for i in range(min(multiprocessing.cpu_count(), 8)): + yield 'CPU%d' % (i), {}, {'debug_paths_stems' : self.debug_paths_stems} + + #override + def on_clients_initialized(self): + io.progress_bar ("Searching deleted files", len (self.input_paths)) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def get_data(self, host_dict): + if len (self.input_paths) > 0: + return [self.input_paths.pop(0)] + return None + + #override + def on_data_return (self, host_dict, data): + self.input_paths.insert(0, data[0]) + + #override + def on_result (self, host_dict, data, result): + if result == False: + self.result.append( data[0] ) + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.result + + +#currently unused +def extract_fanseg(input_dir, device_args={} ): + multi_gpu = device_args.get('multi_gpu', False) + cpu_only = device_args.get('cpu_only', False) + + input_path = Path(input_dir) + if not input_path.exists(): + raise ValueError('Input directory not found. Please ensure it exists.') + + paths_to_extract = [] + for filename in Path_utils.get_image_paths(input_path) : + filepath = Path(filename) + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is not None: + paths_to_extract.append (filepath) + + paths_to_extract_len = len(paths_to_extract) + if paths_to_extract_len > 0: + io.log_info ("Performing extract fanseg for %d files..." % (paths_to_extract_len) ) + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in paths_to_extract ], 'fanseg', multi_gpu=multi_gpu, cpu_only=cpu_only).run() + +def extract_umd_csv(input_file_csv, + image_size=256, + face_type='full_face', + device_args={} ): + + #extract faces from umdfaces.io dataset csv file with pitch,yaw,roll info. + multi_gpu = device_args.get('multi_gpu', False) + cpu_only = device_args.get('cpu_only', False) + face_type = FaceType.fromString(face_type) + + input_file_csv_path = Path(input_file_csv) + if not input_file_csv_path.exists(): + raise ValueError('input_file_csv not found. Please ensure it exists.') + + input_file_csv_root_path = input_file_csv_path.parent + output_path = input_file_csv_path.parent / ('aligned_' + input_file_csv_path.name) + + io.log_info("Output dir is %s." % (str(output_path)) ) + + if output_path.exists(): + output_images_paths = Path_utils.get_image_paths(output_path) + if len(output_images_paths) > 0: + io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) + for filename in output_images_paths: + Path(filename).unlink() + else: + output_path.mkdir(parents=True, exist_ok=True) + + try: + with open( str(input_file_csv_path), 'r') as f: + csv_file = f.read() + except Exception as e: + io.log_err("Unable to open or read file " + str(input_file_csv_path) + ": " + str(e) ) + return + + strings = csv_file.split('\n') + keys = strings[0].split(',') + keys_len = len(keys) + csv_data = [] + for i in range(1, len(strings)): + values = strings[i].split(',') + if keys_len != len(values): + io.log_err("Wrong string in csv file, skipping.") + continue + + csv_data += [ { keys[n] : values[n] for n in range(keys_len) } ] + + data = [] + for d in csv_data: + filename = input_file_csv_root_path / d['FILE'] + + pitch, yaw, roll = float(d['PITCH']), float(d['YAW']), float(d['ROLL']) + if pitch < -90 or pitch > 90 or yaw < -90 or yaw > 90 or roll < -90 or roll > 90: + continue + + pitch_yaw_roll = pitch/90.0, yaw/90.0, roll/90.0 + + x,y,w,h = float(d['FACE_X']), float(d['FACE_Y']), float(d['FACE_WIDTH']), float(d['FACE_HEIGHT']) + + data += [ ExtractSubprocessor.Data(filename=filename, rects=[ [x,y,x+w,y+h] ], pitch_yaw_roll=pitch_yaw_roll) ] + + images_found = len(data) + faces_detected = 0 + if len(data) > 0: + io.log_info ("Performing 2nd pass from csv file...") + data = ExtractSubprocessor (data, 'landmarks', multi_gpu=multi_gpu, cpu_only=cpu_only).run() + + io.log_info ('Performing 3rd pass...') + data = ExtractSubprocessor (data, 'final', image_size, face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + faces_detected += sum([d.faces_detected for d in data]) + + + io.log_info ('-------------------------') + io.log_info ('Images found: %d' % (images_found) ) + io.log_info ('Faces detected: %d' % (faces_detected) ) + io.log_info ('-------------------------') + +def main(input_dir, + output_dir, + debug_dir=None, + detector='mt', + manual_fix=False, + manual_output_debug_fix=False, + manual_window_size=1368, + image_size=256, + face_type='full_face', + device_args={}): + + input_path = Path(input_dir) + output_path = Path(output_dir) + face_type = FaceType.fromString(face_type) + + multi_gpu = device_args.get('multi_gpu', False) + cpu_only = device_args.get('cpu_only', False) + + if not input_path.exists(): + raise ValueError('Input directory not found. Please ensure it exists.') + + if output_path.exists(): + if not manual_output_debug_fix and input_path != output_path: + output_images_paths = Path_utils.get_image_paths(output_path) + if len(output_images_paths) > 0: + io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) + for filename in output_images_paths: + Path(filename).unlink() + else: + output_path.mkdir(parents=True, exist_ok=True) + + if manual_output_debug_fix: + if debug_dir is None: + raise ValueError('debug-dir must be specified') + detector = 'manual' + io.log_info('Performing re-extract frames which were deleted from _debug directory.') + + input_path_image_paths = Path_utils.get_image_unique_filestem_paths(input_path, verbose_print_func=io.log_info) + if debug_dir is not None: + debug_output_path = Path(debug_dir) + + if manual_output_debug_fix: + if not debug_output_path.exists(): + raise ValueError("%s not found " % ( str(debug_output_path) )) + + input_path_image_paths = DeletedFilesSearcherSubprocessor (input_path_image_paths, Path_utils.get_image_paths(debug_output_path) ).run() + input_path_image_paths = sorted (input_path_image_paths) + io.log_info('Found %d images.' % (len(input_path_image_paths))) + else: + if debug_output_path.exists(): + for filename in Path_utils.get_image_paths(debug_output_path): + Path(filename).unlink() + else: + debug_output_path.mkdir(parents=True, exist_ok=True) + + images_found = len(input_path_image_paths) + faces_detected = 0 + if images_found != 0: + if detector == 'manual': + io.log_info ('Performing manual extract...') + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run() + else: + io.log_info ('Performing 1st pass...') + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() + + io.log_info ('Performing 2nd pass...') + data = ExtractSubprocessor (data, 'landmarks', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() + + io.log_info ('Performing 3rd pass...') + data = ExtractSubprocessor (data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + faces_detected += sum([d.faces_detected for d in data]) + + if manual_fix: + if all ( np.array ( [ d.faces_detected > 0 for d in data] ) == True ): + io.log_info ('All faces are detected, manual fix not needed.') + else: + fix_data = [ ExtractSubprocessor.Data(d.filename) for d in data if d.faces_detected == 0 ] + io.log_info ('Performing manual fix for %d images...' % (len(fix_data)) ) + fix_data = ExtractSubprocessor (fix_data, 'landmarks', image_size, face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run() + fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + faces_detected += sum([d.faces_detected for d in fix_data]) + + + io.log_info ('-------------------------') + io.log_info ('Images found: %d' % (images_found) ) + io.log_info ('Faces detected: %d' % (faces_detected) ) + io.log_info ('-------------------------') diff --git a/mainscripts/MaskEditorTool.py b/mainscripts/MaskEditorTool.py index 1d9750f..3f63375 100644 --- a/mainscripts/MaskEditorTool.py +++ b/mainscripts/MaskEditorTool.py @@ -1,556 +1,556 @@ -import os -import sys -import time -import traceback -from pathlib import Path - -import cv2 -import numpy as np -import numpy.linalg as npl - -import imagelib -from facelib import LandmarksProcessor -from imagelib import IEPolys -from interact import interact as io -from utils import Path_utils -from utils.cv2_utils import * -from utils.DFLJPG import DFLJPG -from utils.DFLPNG import DFLPNG - -class MaskEditor: - STATE_NONE=0 - STATE_MASKING=1 - - def __init__(self, img, prev_images, next_images, mask=None, ie_polys=None, get_status_lines_func=None): - self.img = imagelib.normalize_channels (img,3) - h, w, c = img.shape - - if h != w and w != 256: - #to support any square res, scale img,mask and ie_polys to 256, then scale ie_polys back on .get_ie_polys() - raise Exception ("MaskEditor does not support image size != 256x256") - - ph, pw = h // 4, w // 4 #pad wh - - self.prev_images = prev_images - self.next_images = next_images - - if mask is not None: - self.mask = imagelib.normalize_channels (mask,3) - else: - self.mask = np.zeros ( (h,w,3) ) - self.get_status_lines_func = get_status_lines_func - - self.state_prop = self.STATE_NONE - - self.w, self.h = w, h - self.pw, self.ph = pw, ph - self.pwh = np.array([self.pw, self.ph]) - self.pwh2 = np.array([self.pw*2, self.ph*2]) - self.sw, self.sh = w+pw*2, h+ph*2 - self.prwh = 64 #preview wh - - if ie_polys is None: - ie_polys = IEPolys() - self.ie_polys = ie_polys - - self.polys_mask = None - self.preview_images = None - - self.mouse_x = self.mouse_y = 9999 - self.screen_status_block = None - self.screen_status_block_dirty = True - self.screen_changed = True - - def set_state(self, state): - self.state = state - - @property - def state(self): - return self.state_prop - - @state.setter - def state(self, value): - self.state_prop = value - if value == self.STATE_MASKING: - self.ie_polys.dirty = True - - def get_mask(self): - if self.ie_polys.switch_dirty(): - self.screen_status_block_dirty = True - self.ie_mask = img = self.mask.copy() - - self.ie_polys.overlay_mask(img) - - return img - return self.ie_mask - - def get_screen_overlay(self): - img = np.zeros ( (self.sh, self.sw, 3) ) - - if self.state == self.STATE_MASKING: - mouse_xy = self.mouse_xy.copy() + self.pwh - l = self.ie_polys.n_list() - if l.n > 0: - p = l.cur_point().copy() + self.pwh - color = (0,1,0) if l.type == 1 else (0,0,1) - cv2.line(img, tuple(p), tuple(mouse_xy), color ) - - return img - - def undo_to_begin_point(self): - while not self.undo_point(): - pass - - def undo_point(self): - self.screen_changed = True - if self.state == self.STATE_NONE: - if self.ie_polys.n > 0: - self.state = self.STATE_MASKING - - if self.state == self.STATE_MASKING: - if self.ie_polys.n_list().n_dec() == 0 and \ - self.ie_polys.n_dec() == 0: - self.state = self.STATE_NONE - else: - return False - - return True - - def redo_to_end_point(self): - while not self.redo_point(): - pass - - def redo_point(self): - self.screen_changed = True - if self.state == self.STATE_NONE: - if self.ie_polys.n_max > 0: - self.state = self.STATE_MASKING - if self.ie_polys.n == 0: - self.ie_polys.n_inc() - - if self.state == self.STATE_MASKING: - while True: - l = self.ie_polys.n_list() - if l.n_inc() == l.n_max: - if self.ie_polys.n == self.ie_polys.n_max: - break - self.ie_polys.n_inc() - else: - return False - - return True - - def combine_screens(self, screens): - - screens_len = len(screens) - - new_screens = [] - for screen, padded_overlay in screens: - screen_img = np.zeros( (self.sh, self.sw, 3), dtype=np.float32 ) - - screen = imagelib.normalize_channels (screen, 3) - h,w,c = screen.shape - - screen_img[self.ph:-self.ph, self.pw:-self.pw, :] = screen - - if padded_overlay is not None: - screen_img = screen_img + padded_overlay - - screen_img = np.clip(screen_img*255, 0, 255).astype(np.uint8) - new_screens.append(screen_img) - - return np.concatenate (new_screens, axis=1) - - def get_screen_status_block(self, w, c): - if self.screen_status_block_dirty: - self.screen_status_block_dirty = False - lines = [ - 'Polys current/max = %d/%d' % (self.ie_polys.n, self.ie_polys.n_max), - ] - if self.get_status_lines_func is not None: - lines += self.get_status_lines_func() - - lines_count = len(lines) - - - h_line = 21 - h = lines_count * h_line - img = np.ones ( (h,w,c) ) * 0.1 - - for i in range(lines_count): - img[ i*h_line:(i+1)*h_line, 0:w] += \ - imagelib.get_text_image ( (h_line,w,c), lines[i], color=[0.8]*c ) - - self.screen_status_block = np.clip(img*255, 0, 255).astype(np.uint8) - - return self.screen_status_block - - def set_screen_status_block_dirty(self): - self.screen_status_block_dirty = True - - def set_screen_changed(self): - self.screen_changed = True - - def switch_screen_changed(self): - result = self.screen_changed - self.screen_changed = False - return result - - def make_screen(self): - screen_overlay = self.get_screen_overlay() - final_mask = self.get_mask() - - masked_img = self.img*final_mask*0.5 + self.img*(1-final_mask) - - pink = np.full ( (self.h, self.w, 3), (1,0,1) ) - pink_masked_img = self.img*final_mask + pink*(1-final_mask) - - - - - screens = [ (self.img, screen_overlay), - (masked_img, screen_overlay), - (pink_masked_img, screen_overlay), - ] - screens = self.combine_screens(screens) - - if self.preview_images is None: - sh,sw,sc = screens.shape - - prh, prw = self.prwh, self.prwh - - total_w = sum ([ img.shape[1] for (t,img) in self.prev_images ]) + \ - sum ([ img.shape[1] for (t,img) in self.next_images ]) - - total_images_len = len(self.prev_images) + len(self.next_images) - - max_hor_images_count = sw // prw - max_side_images_count = (max_hor_images_count - 1) // 2 - - prev_images = self.prev_images[-max_side_images_count:] - next_images = self.next_images[:max_side_images_count] - - border = 2 - - max_wh_bordered = (prw-border*2, prh-border*2) - - prev_images = [ (t, cv2.resize( imagelib.normalize_channels(img, 3), max_wh_bordered )) for t,img in prev_images ] - next_images = [ (t, cv2.resize( imagelib.normalize_channels(img, 3), max_wh_bordered )) for t,img in next_images ] - - for images in [prev_images, next_images]: - for i, (t, img) in enumerate(images): - new_img = np.zeros ( (prh,prw, sc) ) - new_img[border:-border,border:-border] = img - - if t == 2: - cv2.line (new_img, ( prw//2, int(prh//1.5) ), (int(prw/1.5), prh ) , (0,1,0), thickness=2 ) - cv2.line (new_img, ( int(prw/1.5), prh ), ( prw, prh // 2 ) , (0,1,0), thickness=2 ) - elif t == 1: - cv2.line (new_img, ( prw//2, prh//2 ), ( prw, prh ) , (0,0,1), thickness=2 ) - cv2.line (new_img, ( prw//2, prh ), ( prw, prh // 2 ) , (0,0,1), thickness=2 ) - - images[i] = new_img - - - preview_images = [] - if len(prev_images) > 0: - preview_images += [ np.concatenate (prev_images, axis=1) ] - - img = np.full ( (prh,prw, sc), (0,0,1), dtype=np.float ) - img[border:-border,border:-border] = cv2.resize( self.img, max_wh_bordered ) - - preview_images += [ img ] - - if len(next_images) > 0: - preview_images += [ np.concatenate (next_images, axis=1) ] - - preview_images = np.concatenate ( preview_images, axis=1 ) - - left_pad = sw // 2 - len(prev_images) * prw - prw // 2 - right_pad = sw // 2 - len(next_images) * prw - prw // 2 - - preview_images = np.concatenate ([np.zeros ( (preview_images.shape[0], left_pad, preview_images.shape[2]) ), - preview_images, - np.zeros ( (preview_images.shape[0], right_pad, preview_images.shape[2]) ) - ], axis=1) - self.preview_images = np.clip(preview_images * 255, 0, 255 ).astype(np.uint8) - - status_img = self.get_screen_status_block( screens.shape[1], screens.shape[2] ) - - result = np.concatenate ( [self.preview_images, screens, status_img], axis=0 ) - - return result - - def mask_finish(self, n_clip=True): - if self.state == self.STATE_MASKING: - self.screen_changed = True - if self.ie_polys.n_list().n <= 2: - self.ie_polys.n_dec() - self.state = self.STATE_NONE - if n_clip: - self.ie_polys.n_clip() - - def set_mouse_pos(self,x,y): - if self.preview_images is not None: - y -= self.preview_images.shape[0] - - mouse_x = x % (self.sw) - self.pw - mouse_y = y % (self.sh) - self.ph - - - - if mouse_x != self.mouse_x or mouse_y != self.mouse_y: - self.mouse_xy = np.array( [mouse_x, mouse_y] ) - self.mouse_x, self.mouse_y = self.mouse_xy - self.screen_changed = True - - def mask_point(self, type): - self.screen_changed = True - if self.state == self.STATE_MASKING and \ - self.ie_polys.n_list().type != type: - self.mask_finish() - - elif self.state == self.STATE_NONE: - self.state = self.STATE_MASKING - self.ie_polys.add(type) - - if self.state == self.STATE_MASKING: - self.ie_polys.n_list().add (self.mouse_x, self.mouse_y) - - def get_ie_polys(self): - return self.ie_polys - -def mask_editor_main(input_dir, confirmed_dir=None, skipped_dir=None): - input_path = Path(input_dir) - - confirmed_path = Path(confirmed_dir) - skipped_path = Path(skipped_dir) - - if not input_path.exists(): - raise ValueError('Input directory not found. Please ensure it exists.') - - if not confirmed_path.exists(): - confirmed_path.mkdir(parents=True) - - if not skipped_path.exists(): - skipped_path.mkdir(parents=True) - - wnd_name = "MaskEditor tool" - io.named_window (wnd_name) - io.capture_mouse(wnd_name) - io.capture_keys(wnd_name) - - cached_images = {} - - image_paths = [ Path(x) for x in Path_utils.get_image_paths(input_path)] - done_paths = [] - done_images_types = {} - image_paths_total = len(image_paths) - - zoom_factor = 1.0 - preview_images_count = 9 - target_wh = 256 - - do_prev_count = 0 - do_save_move_count = 0 - do_save_count = 0 - do_skip_move_count = 0 - do_skip_count = 0 - - def jobs_count(): - return do_prev_count + do_save_move_count + do_save_count + do_skip_move_count + do_skip_count - - is_exit = False - while not is_exit: - - if len(image_paths) > 0: - filepath = image_paths.pop(0) - else: - filepath = None - - next_image_paths = image_paths[0:preview_images_count] - next_image_paths_names = [ path.name for path in next_image_paths ] - prev_image_paths = done_paths[-preview_images_count:] - prev_image_paths_names = [ path.name for path in prev_image_paths ] - - for key in list( cached_images.keys() ): - if key not in prev_image_paths_names and \ - key not in next_image_paths_names: - cached_images.pop(key) - - for paths in [prev_image_paths, next_image_paths]: - for path in paths: - if path.name not in cached_images: - cached_images[path.name] = cv2_imread(str(path)) / 255.0 - - if filepath is not None: - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - else: - lmrks = dflimg.get_landmarks() - ie_polys = dflimg.get_ie_polys() - fanseg_mask = dflimg.get_fanseg_mask() - - if filepath.name in cached_images: - img = cached_images[filepath.name] - else: - img = cached_images[filepath.name] = cv2_imread(str(filepath)) / 255.0 - - if fanseg_mask is not None: - mask = fanseg_mask - else: - mask = LandmarksProcessor.get_image_hull_mask( img.shape, lmrks) - else: - img = np.zeros ( (target_wh,target_wh,3) ) - mask = np.ones ( (target_wh,target_wh,3) ) - ie_polys = None - - def get_status_lines_func(): - return ['Progress: %d / %d . Current file: %s' % (len(done_paths), image_paths_total, str(filepath.name) if filepath is not None else "end" ), - '[Left mouse button] - mark include mask.', - '[Right mouse button] - mark exclude mask.', - '[Middle mouse button] - finish current poly.', - '[Mouse wheel] - undo/redo poly or point. [+ctrl] - undo to begin/redo to end', - '[q] - prev image. [w] - skip and move to %s. [e] - save and move to %s. ' % (skipped_path.name, confirmed_path.name), - '[z] - prev image. [x] - skip. [c] - save. ', - 'hold [shift] - speed up the frame counter by 10.', - '[-/+] - window zoom [esc] - quit', - ] - - try: - ed = MaskEditor(img, - [ (done_images_types[name], cached_images[name]) for name in prev_image_paths_names ], - [ (0, cached_images[name]) for name in next_image_paths_names ], - mask, ie_polys, get_status_lines_func) - except Exception as e: - print(e) - continue - - next = False - while not next: - io.process_messages(0.005) - - if jobs_count() == 0: - for (x,y,ev,flags) in io.get_mouse_events(wnd_name): - x, y = int (x / zoom_factor), int(y / zoom_factor) - ed.set_mouse_pos(x, y) - if filepath is not None: - if ev == io.EVENT_LBUTTONDOWN: - ed.mask_point(1) - elif ev == io.EVENT_RBUTTONDOWN: - ed.mask_point(0) - elif ev == io.EVENT_MBUTTONDOWN: - ed.mask_finish() - elif ev == io.EVENT_MOUSEWHEEL: - if flags & 0x80000000 != 0: - if flags & 0x8 != 0: - ed.undo_to_begin_point() - else: - ed.undo_point() - else: - if flags & 0x8 != 0: - ed.redo_to_end_point() - else: - ed.redo_point() - - for key, chr_key, ctrl_pressed, alt_pressed, shift_pressed in io.get_key_events(wnd_name): - if chr_key == 'q' or chr_key == 'z': - do_prev_count = 1 if not shift_pressed else 10 - elif chr_key == '-': - zoom_factor = np.clip (zoom_factor-0.1, 0.1, 4.0) - ed.set_screen_changed() - elif chr_key == '+': - zoom_factor = np.clip (zoom_factor+0.1, 0.1, 4.0) - ed.set_screen_changed() - elif key == 27: #esc - is_exit = True - next = True - break - elif filepath is not None: - if chr_key == 'e': - do_save_move_count = 1 if not shift_pressed else 10 - elif chr_key == 'c': - do_save_count = 1 if not shift_pressed else 10 - elif chr_key == 'w': - do_skip_move_count = 1 if not shift_pressed else 10 - elif chr_key == 'x': - do_skip_count = 1 if not shift_pressed else 10 - - if do_prev_count > 0: - do_prev_count -= 1 - if len(done_paths) > 0: - if filepath is not None: - image_paths.insert(0, filepath) - - filepath = done_paths.pop(-1) - done_images_types[filepath.name] = 0 - - if filepath.parent != input_path: - new_filename_path = input_path / filepath.name - filepath.rename ( new_filename_path ) - image_paths.insert(0, new_filename_path) - else: - image_paths.insert(0, filepath) - - next = True - elif filepath is not None: - if do_save_move_count > 0: - do_save_move_count -= 1 - - ed.mask_finish() - dflimg.embed_and_set (str(filepath), ie_polys=ed.get_ie_polys() ) - - done_paths += [ confirmed_path / filepath.name ] - done_images_types[filepath.name] = 2 - filepath.rename(done_paths[-1]) - - next = True - elif do_save_count > 0: - do_save_count -= 1 - - ed.mask_finish() - dflimg.embed_and_set (str(filepath), ie_polys=ed.get_ie_polys() ) - - done_paths += [ filepath ] - done_images_types[filepath.name] = 2 - - next = True - elif do_skip_move_count > 0: - do_skip_move_count -= 1 - - done_paths += [ skipped_path / filepath.name ] - done_images_types[filepath.name] = 1 - filepath.rename(done_paths[-1]) - - next = True - elif do_skip_count > 0: - do_skip_count -= 1 - - done_paths += [ filepath ] - done_images_types[filepath.name] = 1 - - next = True - else: - do_save_move_count = do_save_count = do_skip_move_count = do_skip_count = 0 - - if jobs_count() == 0: - if ed.switch_screen_changed(): - screen = ed.make_screen() - if zoom_factor != 1.0: - h,w,c = screen.shape - screen = cv2.resize ( screen, ( int(w*zoom_factor), int(h*zoom_factor) ) ) - io.show_image (wnd_name, screen ) - - - io.process_messages(0.005) - - io.destroy_all_windows() - +import os +import sys +import time +import traceback +from pathlib import Path + +import cv2 +import numpy as np +import numpy.linalg as npl + +import imagelib +from facelib import LandmarksProcessor +from imagelib import IEPolys +from interact import interact as io +from utils import Path_utils +from utils.cv2_utils import * +from utils.DFLJPG import DFLJPG +from utils.DFLPNG import DFLPNG + +class MaskEditor: + STATE_NONE=0 + STATE_MASKING=1 + + def __init__(self, img, prev_images, next_images, mask=None, ie_polys=None, get_status_lines_func=None): + self.img = imagelib.normalize_channels (img,3) + h, w, c = img.shape + + if h != w and w != 256: + #to support any square res, scale img,mask and ie_polys to 256, then scale ie_polys back on .get_ie_polys() + raise Exception ("MaskEditor does not support image size != 256x256") + + ph, pw = h // 4, w // 4 #pad wh + + self.prev_images = prev_images + self.next_images = next_images + + if mask is not None: + self.mask = imagelib.normalize_channels (mask,3) + else: + self.mask = np.zeros ( (h,w,3) ) + self.get_status_lines_func = get_status_lines_func + + self.state_prop = self.STATE_NONE + + self.w, self.h = w, h + self.pw, self.ph = pw, ph + self.pwh = np.array([self.pw, self.ph]) + self.pwh2 = np.array([self.pw*2, self.ph*2]) + self.sw, self.sh = w+pw*2, h+ph*2 + self.prwh = 64 #preview wh + + if ie_polys is None: + ie_polys = IEPolys() + self.ie_polys = ie_polys + + self.polys_mask = None + self.preview_images = None + + self.mouse_x = self.mouse_y = 9999 + self.screen_status_block = None + self.screen_status_block_dirty = True + self.screen_changed = True + + def set_state(self, state): + self.state = state + + @property + def state(self): + return self.state_prop + + @state.setter + def state(self, value): + self.state_prop = value + if value == self.STATE_MASKING: + self.ie_polys.dirty = True + + def get_mask(self): + if self.ie_polys.switch_dirty(): + self.screen_status_block_dirty = True + self.ie_mask = img = self.mask.copy() + + self.ie_polys.overlay_mask(img) + + return img + return self.ie_mask + + def get_screen_overlay(self): + img = np.zeros ( (self.sh, self.sw, 3) ) + + if self.state == self.STATE_MASKING: + mouse_xy = self.mouse_xy.copy() + self.pwh + l = self.ie_polys.n_list() + if l.n > 0: + p = l.cur_point().copy() + self.pwh + color = (0,1,0) if l.type == 1 else (0,0,1) + cv2.line(img, tuple(p), tuple(mouse_xy), color ) + + return img + + def undo_to_begin_point(self): + while not self.undo_point(): + pass + + def undo_point(self): + self.screen_changed = True + if self.state == self.STATE_NONE: + if self.ie_polys.n > 0: + self.state = self.STATE_MASKING + + if self.state == self.STATE_MASKING: + if self.ie_polys.n_list().n_dec() == 0 and \ + self.ie_polys.n_dec() == 0: + self.state = self.STATE_NONE + else: + return False + + return True + + def redo_to_end_point(self): + while not self.redo_point(): + pass + + def redo_point(self): + self.screen_changed = True + if self.state == self.STATE_NONE: + if self.ie_polys.n_max > 0: + self.state = self.STATE_MASKING + if self.ie_polys.n == 0: + self.ie_polys.n_inc() + + if self.state == self.STATE_MASKING: + while True: + l = self.ie_polys.n_list() + if l.n_inc() == l.n_max: + if self.ie_polys.n == self.ie_polys.n_max: + break + self.ie_polys.n_inc() + else: + return False + + return True + + def combine_screens(self, screens): + + screens_len = len(screens) + + new_screens = [] + for screen, padded_overlay in screens: + screen_img = np.zeros( (self.sh, self.sw, 3), dtype=np.float32 ) + + screen = imagelib.normalize_channels (screen, 3) + h,w,c = screen.shape + + screen_img[self.ph:-self.ph, self.pw:-self.pw, :] = screen + + if padded_overlay is not None: + screen_img = screen_img + padded_overlay + + screen_img = np.clip(screen_img*255, 0, 255).astype(np.uint8) + new_screens.append(screen_img) + + return np.concatenate (new_screens, axis=1) + + def get_screen_status_block(self, w, c): + if self.screen_status_block_dirty: + self.screen_status_block_dirty = False + lines = [ + 'Polys current/max = %d/%d' % (self.ie_polys.n, self.ie_polys.n_max), + ] + if self.get_status_lines_func is not None: + lines += self.get_status_lines_func() + + lines_count = len(lines) + + + h_line = 21 + h = lines_count * h_line + img = np.ones ( (h,w,c) ) * 0.1 + + for i in range(lines_count): + img[ i*h_line:(i+1)*h_line, 0:w] += \ + imagelib.get_text_image ( (h_line,w,c), lines[i], color=[0.8]*c ) + + self.screen_status_block = np.clip(img*255, 0, 255).astype(np.uint8) + + return self.screen_status_block + + def set_screen_status_block_dirty(self): + self.screen_status_block_dirty = True + + def set_screen_changed(self): + self.screen_changed = True + + def switch_screen_changed(self): + result = self.screen_changed + self.screen_changed = False + return result + + def make_screen(self): + screen_overlay = self.get_screen_overlay() + final_mask = self.get_mask() + + masked_img = self.img*final_mask*0.5 + self.img*(1-final_mask) + + pink = np.full ( (self.h, self.w, 3), (1,0,1) ) + pink_masked_img = self.img*final_mask + pink*(1-final_mask) + + + + + screens = [ (self.img, screen_overlay), + (masked_img, screen_overlay), + (pink_masked_img, screen_overlay), + ] + screens = self.combine_screens(screens) + + if self.preview_images is None: + sh,sw,sc = screens.shape + + prh, prw = self.prwh, self.prwh + + total_w = sum ([ img.shape[1] for (t,img) in self.prev_images ]) + \ + sum ([ img.shape[1] for (t,img) in self.next_images ]) + + total_images_len = len(self.prev_images) + len(self.next_images) + + max_hor_images_count = sw // prw + max_side_images_count = (max_hor_images_count - 1) // 2 + + prev_images = self.prev_images[-max_side_images_count:] + next_images = self.next_images[:max_side_images_count] + + border = 2 + + max_wh_bordered = (prw-border*2, prh-border*2) + + prev_images = [ (t, cv2.resize( imagelib.normalize_channels(img, 3), max_wh_bordered )) for t,img in prev_images ] + next_images = [ (t, cv2.resize( imagelib.normalize_channels(img, 3), max_wh_bordered )) for t,img in next_images ] + + for images in [prev_images, next_images]: + for i, (t, img) in enumerate(images): + new_img = np.zeros ( (prh,prw, sc) ) + new_img[border:-border,border:-border] = img + + if t == 2: + cv2.line (new_img, ( prw//2, int(prh//1.5) ), (int(prw/1.5), prh ) , (0,1,0), thickness=2 ) + cv2.line (new_img, ( int(prw/1.5), prh ), ( prw, prh // 2 ) , (0,1,0), thickness=2 ) + elif t == 1: + cv2.line (new_img, ( prw//2, prh//2 ), ( prw, prh ) , (0,0,1), thickness=2 ) + cv2.line (new_img, ( prw//2, prh ), ( prw, prh // 2 ) , (0,0,1), thickness=2 ) + + images[i] = new_img + + + preview_images = [] + if len(prev_images) > 0: + preview_images += [ np.concatenate (prev_images, axis=1) ] + + img = np.full ( (prh,prw, sc), (0,0,1), dtype=np.float ) + img[border:-border,border:-border] = cv2.resize( self.img, max_wh_bordered ) + + preview_images += [ img ] + + if len(next_images) > 0: + preview_images += [ np.concatenate (next_images, axis=1) ] + + preview_images = np.concatenate ( preview_images, axis=1 ) + + left_pad = sw // 2 - len(prev_images) * prw - prw // 2 + right_pad = sw // 2 - len(next_images) * prw - prw // 2 + + preview_images = np.concatenate ([np.zeros ( (preview_images.shape[0], left_pad, preview_images.shape[2]) ), + preview_images, + np.zeros ( (preview_images.shape[0], right_pad, preview_images.shape[2]) ) + ], axis=1) + self.preview_images = np.clip(preview_images * 255, 0, 255 ).astype(np.uint8) + + status_img = self.get_screen_status_block( screens.shape[1], screens.shape[2] ) + + result = np.concatenate ( [self.preview_images, screens, status_img], axis=0 ) + + return result + + def mask_finish(self, n_clip=True): + if self.state == self.STATE_MASKING: + self.screen_changed = True + if self.ie_polys.n_list().n <= 2: + self.ie_polys.n_dec() + self.state = self.STATE_NONE + if n_clip: + self.ie_polys.n_clip() + + def set_mouse_pos(self,x,y): + if self.preview_images is not None: + y -= self.preview_images.shape[0] + + mouse_x = x % (self.sw) - self.pw + mouse_y = y % (self.sh) - self.ph + + + + if mouse_x != self.mouse_x or mouse_y != self.mouse_y: + self.mouse_xy = np.array( [mouse_x, mouse_y] ) + self.mouse_x, self.mouse_y = self.mouse_xy + self.screen_changed = True + + def mask_point(self, type): + self.screen_changed = True + if self.state == self.STATE_MASKING and \ + self.ie_polys.n_list().type != type: + self.mask_finish() + + elif self.state == self.STATE_NONE: + self.state = self.STATE_MASKING + self.ie_polys.add(type) + + if self.state == self.STATE_MASKING: + self.ie_polys.n_list().add (self.mouse_x, self.mouse_y) + + def get_ie_polys(self): + return self.ie_polys + +def mask_editor_main(input_dir, confirmed_dir=None, skipped_dir=None): + input_path = Path(input_dir) + + confirmed_path = Path(confirmed_dir) + skipped_path = Path(skipped_dir) + + if not input_path.exists(): + raise ValueError('Input directory not found. Please ensure it exists.') + + if not confirmed_path.exists(): + confirmed_path.mkdir(parents=True) + + if not skipped_path.exists(): + skipped_path.mkdir(parents=True) + + wnd_name = "MaskEditor tool" + io.named_window (wnd_name) + io.capture_mouse(wnd_name) + io.capture_keys(wnd_name) + + cached_images = {} + + image_paths = [ Path(x) for x in Path_utils.get_image_paths(input_path)] + done_paths = [] + done_images_types = {} + image_paths_total = len(image_paths) + + zoom_factor = 1.0 + preview_images_count = 9 + target_wh = 256 + + do_prev_count = 0 + do_save_move_count = 0 + do_save_count = 0 + do_skip_move_count = 0 + do_skip_count = 0 + + def jobs_count(): + return do_prev_count + do_save_move_count + do_save_count + do_skip_move_count + do_skip_count + + is_exit = False + while not is_exit: + + if len(image_paths) > 0: + filepath = image_paths.pop(0) + else: + filepath = None + + next_image_paths = image_paths[0:preview_images_count] + next_image_paths_names = [ path.name for path in next_image_paths ] + prev_image_paths = done_paths[-preview_images_count:] + prev_image_paths_names = [ path.name for path in prev_image_paths ] + + for key in list( cached_images.keys() ): + if key not in prev_image_paths_names and \ + key not in next_image_paths_names: + cached_images.pop(key) + + for paths in [prev_image_paths, next_image_paths]: + for path in paths: + if path.name not in cached_images: + cached_images[path.name] = cv2_imread(str(path)) / 255.0 + + if filepath is not None: + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + continue + else: + lmrks = dflimg.get_landmarks() + ie_polys = dflimg.get_ie_polys() + fanseg_mask = dflimg.get_fanseg_mask() + + if filepath.name in cached_images: + img = cached_images[filepath.name] + else: + img = cached_images[filepath.name] = cv2_imread(str(filepath)) / 255.0 + + if fanseg_mask is not None: + mask = fanseg_mask + else: + mask = LandmarksProcessor.get_image_hull_mask( img.shape, lmrks) + else: + img = np.zeros ( (target_wh,target_wh,3) ) + mask = np.ones ( (target_wh,target_wh,3) ) + ie_polys = None + + def get_status_lines_func(): + return ['Progress: %d / %d . Current file: %s' % (len(done_paths), image_paths_total, str(filepath.name) if filepath is not None else "end" ), + '[Left mouse button] - mark include mask.', + '[Right mouse button] - mark exclude mask.', + '[Middle mouse button] - finish current poly.', + '[Mouse wheel] - undo/redo poly or point. [+ctrl] - undo to begin/redo to end', + '[q] - prev image. [w] - skip and move to %s. [e] - save and move to %s. ' % (skipped_path.name, confirmed_path.name), + '[z] - prev image. [x] - skip. [c] - save. ', + 'hold [shift] - speed up the frame counter by 10.', + '[-/+] - window zoom [esc] - quit', + ] + + try: + ed = MaskEditor(img, + [ (done_images_types[name], cached_images[name]) for name in prev_image_paths_names ], + [ (0, cached_images[name]) for name in next_image_paths_names ], + mask, ie_polys, get_status_lines_func) + except Exception as e: + print(e) + continue + + next = False + while not next: + io.process_messages(0.005) + + if jobs_count() == 0: + for (x,y,ev,flags) in io.get_mouse_events(wnd_name): + x, y = int (x / zoom_factor), int(y / zoom_factor) + ed.set_mouse_pos(x, y) + if filepath is not None: + if ev == io.EVENT_LBUTTONDOWN: + ed.mask_point(1) + elif ev == io.EVENT_RBUTTONDOWN: + ed.mask_point(0) + elif ev == io.EVENT_MBUTTONDOWN: + ed.mask_finish() + elif ev == io.EVENT_MOUSEWHEEL: + if flags & 0x80000000 != 0: + if flags & 0x8 != 0: + ed.undo_to_begin_point() + else: + ed.undo_point() + else: + if flags & 0x8 != 0: + ed.redo_to_end_point() + else: + ed.redo_point() + + for key, chr_key, ctrl_pressed, alt_pressed, shift_pressed in io.get_key_events(wnd_name): + if chr_key == 'q' or chr_key == 'z': + do_prev_count = 1 if not shift_pressed else 10 + elif chr_key == '-': + zoom_factor = np.clip (zoom_factor-0.1, 0.1, 4.0) + ed.set_screen_changed() + elif chr_key == '+': + zoom_factor = np.clip (zoom_factor+0.1, 0.1, 4.0) + ed.set_screen_changed() + elif key == 27: #esc + is_exit = True + next = True + break + elif filepath is not None: + if chr_key == 'e': + do_save_move_count = 1 if not shift_pressed else 10 + elif chr_key == 'c': + do_save_count = 1 if not shift_pressed else 10 + elif chr_key == 'w': + do_skip_move_count = 1 if not shift_pressed else 10 + elif chr_key == 'x': + do_skip_count = 1 if not shift_pressed else 10 + + if do_prev_count > 0: + do_prev_count -= 1 + if len(done_paths) > 0: + if filepath is not None: + image_paths.insert(0, filepath) + + filepath = done_paths.pop(-1) + done_images_types[filepath.name] = 0 + + if filepath.parent != input_path: + new_filename_path = input_path / filepath.name + filepath.rename ( new_filename_path ) + image_paths.insert(0, new_filename_path) + else: + image_paths.insert(0, filepath) + + next = True + elif filepath is not None: + if do_save_move_count > 0: + do_save_move_count -= 1 + + ed.mask_finish() + dflimg.embed_and_set (str(filepath), ie_polys=ed.get_ie_polys() ) + + done_paths += [ confirmed_path / filepath.name ] + done_images_types[filepath.name] = 2 + filepath.rename(done_paths[-1]) + + next = True + elif do_save_count > 0: + do_save_count -= 1 + + ed.mask_finish() + dflimg.embed_and_set (str(filepath), ie_polys=ed.get_ie_polys() ) + + done_paths += [ filepath ] + done_images_types[filepath.name] = 2 + + next = True + elif do_skip_move_count > 0: + do_skip_move_count -= 1 + + done_paths += [ skipped_path / filepath.name ] + done_images_types[filepath.name] = 1 + filepath.rename(done_paths[-1]) + + next = True + elif do_skip_count > 0: + do_skip_count -= 1 + + done_paths += [ filepath ] + done_images_types[filepath.name] = 1 + + next = True + else: + do_save_move_count = do_save_count = do_skip_move_count = do_skip_count = 0 + + if jobs_count() == 0: + if ed.switch_screen_changed(): + screen = ed.make_screen() + if zoom_factor != 1.0: + h,w,c = screen.shape + screen = cv2.resize ( screen, ( int(w*zoom_factor), int(h*zoom_factor) ) ) + io.show_image (wnd_name, screen ) + + + io.process_messages(0.005) + + io.destroy_all_windows() + diff --git a/mainscripts/Sorter.py b/mainscripts/Sorter.py index 9ff2588..f83ab70 100644 --- a/mainscripts/Sorter.py +++ b/mainscripts/Sorter.py @@ -1,803 +1,803 @@ -import os -import sys -import operator -import numpy as np -import cv2 -from shutil import copyfile -from pathlib import Path -from utils import Path_utils -from utils.DFLPNG import DFLPNG -from utils.DFLJPG import DFLJPG -from utils.cv2_utils import * -from facelib import LandmarksProcessor -from joblib import Subprocessor -import multiprocessing -from interact import interact as io -from imagelib import estimate_sharpness - -class BlurEstimatorSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - self.log_info('Running on %s.' % (client_dict['device_name']) ) - - #override - def process_data(self, data): - filepath = Path( data[0] ) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is not None: - image = cv2_imread( str(filepath) ) - return [ str(filepath), estimate_sharpness(image) ] - else: - self.log_err ("%s is not a dfl image file" % (filepath.name) ) - return [ str(filepath), 0 ] - - #override - def get_data_name (self, data): - #return string identificator of your data - return data[0] - - #override - def __init__(self, input_data ): - self.input_data = input_data - self.img_list = [] - self.trash_img_list = [] - super().__init__('BlurEstimator', BlurEstimatorSubprocessor.Cli, 60) - - #override - def on_clients_initialized(self): - io.progress_bar ("", len (self.input_data)) - - #override - def on_clients_finalized(self): - io.progress_bar_close () - - #override - def process_info_generator(self): - for i in range(0, multiprocessing.cpu_count() ): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - } - - #override - def get_data(self, host_dict): - if len (self.input_data) > 0: - return self.input_data.pop(0) - - return None - - #override - def on_data_return (self, host_dict, data): - self.input_data.insert(0, data) - - #override - def on_result (self, host_dict, data, result): - if result[1] == 0: - self.trash_img_list.append ( result ) - else: - self.img_list.append ( result ) - - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.img_list, self.trash_img_list - - -def sort_by_blur(input_path): - io.log_info ("Sorting by blur...") - - img_list = [ (filename,[]) for filename in Path_utils.get_image_paths(input_path) ] - img_list, trash_img_list = BlurEstimatorSubprocessor (img_list).run() - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - - return img_list, trash_img_list - -def sort_by_face(input_path): - io.log_info ("Sorting by face similarity...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - img_list.append( [str(filepath), dflimg.get_landmarks()] ) - - - img_list_len = len(img_list) - for i in io.progress_bar_generator ( range(0, img_list_len-1), "Sorting"): - min_score = float("inf") - j_min_score = i+1 - for j in range(i+1,len(img_list)): - - fl1 = img_list[i][1] - fl2 = img_list[j][1] - score = np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) - - if score < min_score: - min_score = score - j_min_score = j - img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] - - return img_list, trash_img_list - -def sort_by_face_dissim(input_path): - - io.log_info ("Sorting by face dissimilarity...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - img_list.append( [str(filepath), dflimg.get_landmarks(), 0 ] ) - - img_list_len = len(img_list) - for i in io.progress_bar_generator( range(img_list_len-1), "Sorting"): - score_total = 0 - for j in range(i+1,len(img_list)): - if i == j: - continue - fl1 = img_list[i][1] - fl2 = img_list[j][1] - score_total += np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) - - img_list[i][2] = score_total - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) - - return img_list, trash_img_list - -def sort_by_face_yaw(input_path): - io.log_info ("Sorting by face yaw...") - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - pitch_yaw_roll = dflimg.get_pitch_yaw_roll() - if pitch_yaw_roll is not None: - pitch, yaw, roll = pitch_yaw_roll - else: - pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) - - img_list.append( [str(filepath), yaw ] ) - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - - return img_list, trash_img_list - -def sort_by_face_pitch(input_path): - io.log_info ("Sorting by face pitch...") - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - pitch_yaw_roll = dflimg.get_pitch_yaw_roll() - if pitch_yaw_roll is not None: - pitch, yaw, roll = pitch_yaw_roll - else: - pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) - - img_list.append( [str(filepath), pitch ] ) - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - - return img_list, trash_img_list - -class HistSsimSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - - #override - def process_data(self, data): - img_list = [] - for x in data: - img = cv2_imread(x) - img_list.append ([x, cv2.calcHist([img], [0], None, [256], [0, 256]), - cv2.calcHist([img], [1], None, [256], [0, 256]), - cv2.calcHist([img], [2], None, [256], [0, 256]) - ]) - - img_list_len = len(img_list) - for i in range(img_list_len-1): - min_score = float("inf") - j_min_score = i+1 - for j in range(i+1,len(img_list)): - score = cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + \ - cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + \ - cv2.compareHist(img_list[i][3], img_list[j][3], cv2.HISTCMP_BHATTACHARYYA) - if score < min_score: - min_score = score - j_min_score = j - img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] - - self.progress_bar_inc(1) - - return img_list - - #override - def get_data_name (self, data): - return "Bunch of images" - - #override - def __init__(self, img_list ): - self.img_list = img_list - self.img_list_len = len(img_list) - - slice_count = 20000 - sliced_count = self.img_list_len // slice_count - - if sliced_count > 12: - sliced_count = 11.9 - slice_count = int(self.img_list_len / sliced_count) - sliced_count = self.img_list_len // slice_count - - self.img_chunks_list = [ self.img_list[i*slice_count : (i+1)*slice_count] for i in range(sliced_count) ] + \ - [ self.img_list[sliced_count*slice_count:] ] - - self.result = [] - super().__init__('HistSsim', HistSsimSubprocessor.Cli, 0) - - #override - def process_info_generator(self): - for i in range( len(self.img_chunks_list) ): - yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, - 'device_name': 'CPU%d' % (i) - } - #override - def on_clients_initialized(self): - io.progress_bar ("Sorting", len(self.img_list)) - io.progress_bar_inc(len(self.img_chunks_list)) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def get_data(self, host_dict): - if len (self.img_chunks_list) > 0: - return self.img_chunks_list.pop(0) - return None - - #override - def on_data_return (self, host_dict, data): - raise Exception("Fail to process data. Decrease number of images and try again.") - - #override - def on_result (self, host_dict, data, result): - self.result += result - return 0 - - #override - def get_result(self): - return self.result - -def sort_by_hist(input_path): - io.log_info ("Sorting by histogram similarity...") - img_list = HistSsimSubprocessor(Path_utils.get_image_paths(input_path)).run() - return img_list - -class HistDissimSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - self.img_list = client_dict['img_list'] - self.img_list_len = len(self.img_list) - - #override - def process_data(self, data): - i = data[0] - score_total = 0 - for j in range( 0, self.img_list_len): - if i == j: - continue - score_total += cv2.compareHist(self.img_list[i][1], self.img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) - - return score_total - - #override - def get_data_name (self, data): - #return string identificator of your data - return self.img_list[data[0]][0] - - #override - def __init__(self, img_list ): - self.img_list = img_list - self.img_list_range = [i for i in range(0, len(img_list) )] - self.result = [] - super().__init__('HistDissim', HistDissimSubprocessor.Cli, 60) - - #override - def on_clients_initialized(self): - io.progress_bar ("Sorting", len (self.img_list) ) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def process_info_generator(self): - for i in range(0, min(multiprocessing.cpu_count(), 8) ): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'img_list' : self.img_list - } - #override - def get_data(self, host_dict): - if len (self.img_list_range) > 0: - return [self.img_list_range.pop(0)] - - return None - - #override - def on_data_return (self, host_dict, data): - self.img_list_range.insert(0, data[0]) - - #override - def on_result (self, host_dict, data, result): - self.img_list[data[0]][2] = result - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.img_list - -def sort_by_hist_dissim(input_path): - io.log_info ("Sorting by histogram dissimilarity...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ([str(filepath)]) - continue - - image = cv2_imread(str(filepath)) - face_mask = LandmarksProcessor.get_image_hull_mask (image.shape, dflimg.get_landmarks()) - image = (image*face_mask).astype(np.uint8) - - img_list.append ([str(filepath), cv2.calcHist([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)], [0], None, [256], [0, 256]), 0 ]) - - img_list = HistDissimSubprocessor(img_list).run() - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) - - return img_list, trash_img_list - -def sort_by_brightness(input_path): - io.log_info ("Sorting by brightness...") - img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,2].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - return img_list - -def sort_by_hue(input_path): - io.log_info ("Sorting by hue...") - img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,0].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - return img_list - -def sort_by_black(input_path): - io.log_info ("Sorting by amount of black pixels...") - - img_list = [] - for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - img = cv2_imread(x) - img_list.append ([x, img[(img == 0)].size ]) - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=False) - - return img_list - -def sort_by_origname(input_path): - io.log_info ("Sort by original filename...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append( [str(filepath)] ) - continue - - img_list.append( [str(filepath), dflimg.get_source_filename()] ) - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1)) - return img_list, trash_img_list - -def sort_by_oneface_in_image(input_path): - io.log_info ("Sort by one face in images...") - image_paths = Path_utils.get_image_paths(input_path) - a = np.array ([ ( int(x[0]), int(x[1]) ) \ - for x in [ Path(filepath).stem.split('_') for filepath in image_paths ] if len(x) == 2 - ]) - if len(a) > 0: - idxs = np.ndarray.flatten ( np.argwhere ( a[:,1] != 0 ) ) - idxs = np.unique ( a[idxs][:,0] ) - idxs = np.ndarray.flatten ( np.argwhere ( np.array([ x[0] in idxs for x in a ]) == True ) ) - if len(idxs) > 0: - io.log_info ("Found %d images." % (len(idxs)) ) - img_list = [ (path,) for i,path in enumerate(image_paths) if i not in idxs ] - trash_img_list = [ (image_paths[x],) for x in idxs ] - return img_list, trash_img_list - return [], [] - -class FinalLoaderSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - self.include_by_blur = client_dict['include_by_blur'] - - #override - def process_data(self, data): - filepath = Path(data[0]) - - try: - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - self.log_err("%s is not a dfl image file" % (filepath.name)) - return [ 1, [str(filepath)] ] - - bgr = cv2_imread(str(filepath)) - if bgr is None: - raise Exception ("Unable to load %s" % (filepath.name) ) - - gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) - sharpness = estimate_sharpness(gray) if self.include_by_blur else 0 - pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) - - hist = cv2.calcHist([gray], [0], None, [256], [0, 256]) - except Exception as e: - self.log_err (e) - return [ 1, [str(filepath)] ] - - return [ 0, [str(filepath), sharpness, hist, yaw ] ] - - #override - def get_data_name (self, data): - #return string identificator of your data - return data[0] - - #override - def __init__(self, img_list, include_by_blur ): - self.img_list = img_list - - self.include_by_blur = include_by_blur - self.result = [] - self.result_trash = [] - - super().__init__('FinalLoader', FinalLoaderSubprocessor.Cli, 60) - - #override - def on_clients_initialized(self): - io.progress_bar ("Loading", len (self.img_list)) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def process_info_generator(self): - for i in range(0, min(multiprocessing.cpu_count(), 8) ): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'include_by_blur': self.include_by_blur - } - - #override - def get_data(self, host_dict): - if len (self.img_list) > 0: - return [self.img_list.pop(0)] - - return None - - #override - def on_data_return (self, host_dict, data): - self.img_list.insert(0, data[0]) - - #override - def on_result (self, host_dict, data, result): - if result[0] == 0: - self.result.append (result[1]) - else: - self.result_trash.append (result[1]) - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.result, self.result_trash - -class FinalHistDissimSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - - #override - def process_data(self, data): - idx, img_list = data - for i in range( len(img_list) ): - score_total = 0 - for j in range( len(img_list) ): - if i == j: - continue - score_total += cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) - img_list[i][3] = score_total - img_list = sorted(img_list, key=operator.itemgetter(3), reverse=True) - return idx, img_list - - #override - def get_data_name (self, data): - return "Bunch of images" - - #override - def __init__(self, yaws_sample_list ): - self.yaws_sample_list = yaws_sample_list - self.yaws_sample_list_len = len(yaws_sample_list) - - self.yaws_sample_list_idxs = [ i for i in range(self.yaws_sample_list_len) if self.yaws_sample_list[i] is not None ] - self.result = [ None for _ in range(self.yaws_sample_list_len) ] - super().__init__('FinalHistDissimSubprocessor', FinalHistDissimSubprocessor.Cli) - - #override - def process_info_generator(self): - for i in range(min(multiprocessing.cpu_count(), 8) ): - yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, - 'device_name': 'CPU%d' % (i) - } - #override - def on_clients_initialized(self): - io.progress_bar ("Sort by hist-dissim", self.yaws_sample_list_len) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def get_data(self, host_dict): - if len (self.yaws_sample_list_idxs) > 0: - idx = self.yaws_sample_list_idxs.pop(0) - - return idx, self.yaws_sample_list[idx] - return None - - #override - def on_data_return (self, host_dict, data): - self.yaws_sample_list_idxs.insert(0, data[0]) - - #override - def on_result (self, host_dict, data, result): - idx, yaws_sample_list = data - self.result[idx] = yaws_sample_list - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.result - -def sort_final(input_path, include_by_blur=True): - io.log_info ("Performing final sort.") - - target_count = io.input_int ("Target number of images? (default:2000) : ", 2000) - - img_list, trash_img_list = FinalLoaderSubprocessor( Path_utils.get_image_paths(input_path), include_by_blur ).run() - final_img_list = [] - - grads = 128 - imgs_per_grad = round (target_count / grads) - - grads_space = np.linspace (-1.0,1.0,grads) - - yaws_sample_list = [None]*grads - for g in io.progress_bar_generator ( range(grads), "Sort by yaw"): - yaw = grads_space[g] - next_yaw = grads_space[g+1] if g < grads-1 else yaw - - yaw_samples = [] - for img in img_list: - s_yaw = -img[3] - if (g == 0 and s_yaw < next_yaw) or \ - (g < grads-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ - (g == grads-1 and s_yaw >= yaw): - yaw_samples += [ img ] - if len(yaw_samples) > 0: - yaws_sample_list[g] = yaw_samples - - total_lack = 0 - for g in io.progress_bar_generator ( range(grads), ""): - img_list = yaws_sample_list[g] - img_list_len = len(img_list) if img_list is not None else 0 - - lack = imgs_per_grad - img_list_len - total_lack += max(lack, 0) - - imgs_per_grad += total_lack // grads - - if include_by_blur: - sharpned_imgs_per_grad = imgs_per_grad*10 - for g in io.progress_bar_generator ( range (grads), "Sort by blur"): - img_list = yaws_sample_list[g] - if img_list is None: - continue - - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - - if len(img_list) > sharpned_imgs_per_grad: - trash_img_list += img_list[sharpned_imgs_per_grad:] - img_list = img_list[0:sharpned_imgs_per_grad] - - yaws_sample_list[g] = img_list - - yaws_sample_list = FinalHistDissimSubprocessor(yaws_sample_list).run() - - for g in io.progress_bar_generator ( range (grads), "Fetching best"): - img_list = yaws_sample_list[g] - if img_list is None: - continue - - final_img_list += img_list[0:imgs_per_grad] - trash_img_list += img_list[imgs_per_grad:] - - return final_img_list, trash_img_list - -def final_process(input_path, img_list, trash_img_list): - if len(trash_img_list) != 0: - parent_input_path = input_path.parent - trash_path = parent_input_path / (input_path.stem + '_trash') - trash_path.mkdir (exist_ok=True) - - io.log_info ("Trashing %d items to %s" % ( len(trash_img_list), str(trash_path) ) ) - - for filename in Path_utils.get_image_paths(trash_path): - Path(filename).unlink() - - for i in io.progress_bar_generator( range(len(trash_img_list)), "Moving trash", leave=False): - src = Path (trash_img_list[i][0]) - dst = trash_path / src.name - try: - src.rename (dst) - except: - io.log_info ('fail to trashing %s' % (src.name) ) - - io.log_info ("") - - if len(img_list) != 0: - for i in io.progress_bar_generator( [*range(len(img_list))], "Renaming", leave=False): - src = Path (img_list[i][0]) - dst = input_path / ('%.5d_%s' % (i, src.name )) - try: - src.rename (dst) - except: - io.log_info ('fail to rename %s' % (src.name) ) - - for i in io.progress_bar_generator( [*range(len(img_list))], "Renaming"): - src = Path (img_list[i][0]) - src = input_path / ('%.5d_%s' % (i, src.name)) - dst = input_path / ('%.5d%s' % (i, src.suffix)) - try: - src.rename (dst) - except: - io.log_info ('fail to rename %s' % (src.name) ) - - - -def main (input_path, sort_by_method): - input_path = Path(input_path) - sort_by_method = sort_by_method.lower() - - io.log_info ("Running sort tool.\r\n") - - img_list = [] - trash_img_list = [] - if sort_by_method == 'blur': img_list, trash_img_list = sort_by_blur (input_path) - elif sort_by_method == 'face': img_list, trash_img_list = sort_by_face (input_path) - elif sort_by_method == 'face-dissim': img_list, trash_img_list = sort_by_face_dissim (input_path) - elif sort_by_method == 'face-yaw': img_list, trash_img_list = sort_by_face_yaw (input_path) - elif sort_by_method == 'face-pitch': img_list, trash_img_list = sort_by_face_pitch (input_path) - elif sort_by_method == 'hist': img_list = sort_by_hist (input_path) - elif sort_by_method == 'hist-dissim': img_list, trash_img_list = sort_by_hist_dissim (input_path) - elif sort_by_method == 'brightness': img_list = sort_by_brightness (input_path) - elif sort_by_method == 'hue': img_list = sort_by_hue (input_path) - elif sort_by_method == 'black': img_list = sort_by_black (input_path) - elif sort_by_method == 'origname': img_list, trash_img_list = sort_by_origname (input_path) - elif sort_by_method == 'oneface': img_list, trash_img_list = sort_by_oneface_in_image (input_path) - elif sort_by_method == 'final': img_list, trash_img_list = sort_final (input_path) - elif sort_by_method == 'final-no-blur': img_list, trash_img_list = sort_final (input_path, include_by_blur=False) - - final_process (input_path, img_list, trash_img_list) +import os +import sys +import operator +import numpy as np +import cv2 +from shutil import copyfile +from pathlib import Path +from utils import Path_utils +from utils.DFLPNG import DFLPNG +from utils.DFLJPG import DFLJPG +from utils.cv2_utils import * +from facelib import LandmarksProcessor +from joblib import Subprocessor +import multiprocessing +from interact import interact as io +from imagelib import estimate_sharpness + +class BlurEstimatorSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + + #override + def on_initialize(self, client_dict): + self.log_info('Running on %s.' % (client_dict['device_name']) ) + + #override + def process_data(self, data): + filepath = Path( data[0] ) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is not None: + image = cv2_imread( str(filepath) ) + return [ str(filepath), estimate_sharpness(image) ] + else: + self.log_err ("%s is not a dfl image file" % (filepath.name) ) + return [ str(filepath), 0 ] + + #override + def get_data_name (self, data): + #return string identificator of your data + return data[0] + + #override + def __init__(self, input_data ): + self.input_data = input_data + self.img_list = [] + self.trash_img_list = [] + super().__init__('BlurEstimator', BlurEstimatorSubprocessor.Cli, 60) + + #override + def on_clients_initialized(self): + io.progress_bar ("", len (self.input_data)) + + #override + def on_clients_finalized(self): + io.progress_bar_close () + + #override + def process_info_generator(self): + for i in range(0, multiprocessing.cpu_count() ): + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + } + + #override + def get_data(self, host_dict): + if len (self.input_data) > 0: + return self.input_data.pop(0) + + return None + + #override + def on_data_return (self, host_dict, data): + self.input_data.insert(0, data) + + #override + def on_result (self, host_dict, data, result): + if result[1] == 0: + self.trash_img_list.append ( result ) + else: + self.img_list.append ( result ) + + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.img_list, self.trash_img_list + + +def sort_by_blur(input_path): + io.log_info ("Sorting by blur...") + + img_list = [ (filename,[]) for filename in Path_utils.get_image_paths(input_path) ] + img_list, trash_img_list = BlurEstimatorSubprocessor (img_list).run() + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + return img_list, trash_img_list + +def sort_by_face(input_path): + io.log_info ("Sorting by face similarity...") + + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ( [str(filepath)] ) + continue + + img_list.append( [str(filepath), dflimg.get_landmarks()] ) + + + img_list_len = len(img_list) + for i in io.progress_bar_generator ( range(0, img_list_len-1), "Sorting"): + min_score = float("inf") + j_min_score = i+1 + for j in range(i+1,len(img_list)): + + fl1 = img_list[i][1] + fl2 = img_list[j][1] + score = np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) + + if score < min_score: + min_score = score + j_min_score = j + img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] + + return img_list, trash_img_list + +def sort_by_face_dissim(input_path): + + io.log_info ("Sorting by face dissimilarity...") + + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ( [str(filepath)] ) + continue + + img_list.append( [str(filepath), dflimg.get_landmarks(), 0 ] ) + + img_list_len = len(img_list) + for i in io.progress_bar_generator( range(img_list_len-1), "Sorting"): + score_total = 0 + for j in range(i+1,len(img_list)): + if i == j: + continue + fl1 = img_list[i][1] + fl2 = img_list[j][1] + score_total += np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) + + img_list[i][2] = score_total + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) + + return img_list, trash_img_list + +def sort_by_face_yaw(input_path): + io.log_info ("Sorting by face yaw...") + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ( [str(filepath)] ) + continue + + pitch_yaw_roll = dflimg.get_pitch_yaw_roll() + if pitch_yaw_roll is not None: + pitch, yaw, roll = pitch_yaw_roll + else: + pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) + + img_list.append( [str(filepath), yaw ] ) + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + return img_list, trash_img_list + +def sort_by_face_pitch(input_path): + io.log_info ("Sorting by face pitch...") + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ( [str(filepath)] ) + continue + + pitch_yaw_roll = dflimg.get_pitch_yaw_roll() + if pitch_yaw_roll is not None: + pitch, yaw, roll = pitch_yaw_roll + else: + pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) + + img_list.append( [str(filepath), pitch ] ) + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + return img_list, trash_img_list + +class HistSsimSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.log_info ('Running on %s.' % (client_dict['device_name']) ) + + #override + def process_data(self, data): + img_list = [] + for x in data: + img = cv2_imread(x) + img_list.append ([x, cv2.calcHist([img], [0], None, [256], [0, 256]), + cv2.calcHist([img], [1], None, [256], [0, 256]), + cv2.calcHist([img], [2], None, [256], [0, 256]) + ]) + + img_list_len = len(img_list) + for i in range(img_list_len-1): + min_score = float("inf") + j_min_score = i+1 + for j in range(i+1,len(img_list)): + score = cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][3], img_list[j][3], cv2.HISTCMP_BHATTACHARYYA) + if score < min_score: + min_score = score + j_min_score = j + img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] + + self.progress_bar_inc(1) + + return img_list + + #override + def get_data_name (self, data): + return "Bunch of images" + + #override + def __init__(self, img_list ): + self.img_list = img_list + self.img_list_len = len(img_list) + + slice_count = 20000 + sliced_count = self.img_list_len // slice_count + + if sliced_count > 12: + sliced_count = 11.9 + slice_count = int(self.img_list_len / sliced_count) + sliced_count = self.img_list_len // slice_count + + self.img_chunks_list = [ self.img_list[i*slice_count : (i+1)*slice_count] for i in range(sliced_count) ] + \ + [ self.img_list[sliced_count*slice_count:] ] + + self.result = [] + super().__init__('HistSsim', HistSsimSubprocessor.Cli, 0) + + #override + def process_info_generator(self): + for i in range( len(self.img_chunks_list) ): + yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, + 'device_name': 'CPU%d' % (i) + } + #override + def on_clients_initialized(self): + io.progress_bar ("Sorting", len(self.img_list)) + io.progress_bar_inc(len(self.img_chunks_list)) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def get_data(self, host_dict): + if len (self.img_chunks_list) > 0: + return self.img_chunks_list.pop(0) + return None + + #override + def on_data_return (self, host_dict, data): + raise Exception("Fail to process data. Decrease number of images and try again.") + + #override + def on_result (self, host_dict, data, result): + self.result += result + return 0 + + #override + def get_result(self): + return self.result + +def sort_by_hist(input_path): + io.log_info ("Sorting by histogram similarity...") + img_list = HistSsimSubprocessor(Path_utils.get_image_paths(input_path)).run() + return img_list + +class HistDissimSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.log_info ('Running on %s.' % (client_dict['device_name']) ) + self.img_list = client_dict['img_list'] + self.img_list_len = len(self.img_list) + + #override + def process_data(self, data): + i = data[0] + score_total = 0 + for j in range( 0, self.img_list_len): + if i == j: + continue + score_total += cv2.compareHist(self.img_list[i][1], self.img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + + return score_total + + #override + def get_data_name (self, data): + #return string identificator of your data + return self.img_list[data[0]][0] + + #override + def __init__(self, img_list ): + self.img_list = img_list + self.img_list_range = [i for i in range(0, len(img_list) )] + self.result = [] + super().__init__('HistDissim', HistDissimSubprocessor.Cli, 60) + + #override + def on_clients_initialized(self): + io.progress_bar ("Sorting", len (self.img_list) ) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def process_info_generator(self): + for i in range(0, min(multiprocessing.cpu_count(), 8) ): + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + 'img_list' : self.img_list + } + #override + def get_data(self, host_dict): + if len (self.img_list_range) > 0: + return [self.img_list_range.pop(0)] + + return None + + #override + def on_data_return (self, host_dict, data): + self.img_list_range.insert(0, data[0]) + + #override + def on_result (self, host_dict, data, result): + self.img_list[data[0]][2] = result + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.img_list + +def sort_by_hist_dissim(input_path): + io.log_info ("Sorting by histogram dissimilarity...") + + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ([str(filepath)]) + continue + + image = cv2_imread(str(filepath)) + face_mask = LandmarksProcessor.get_image_hull_mask (image.shape, dflimg.get_landmarks()) + image = (image*face_mask).astype(np.uint8) + + img_list.append ([str(filepath), cv2.calcHist([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)], [0], None, [256], [0, 256]), 0 ]) + + img_list = HistDissimSubprocessor(img_list).run() + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) + + return img_list, trash_img_list + +def sort_by_brightness(input_path): + io.log_info ("Sorting by brightness...") + img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,2].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + return img_list + +def sort_by_hue(input_path): + io.log_info ("Sorting by hue...") + img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,0].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + return img_list + +def sort_by_black(input_path): + io.log_info ("Sorting by amount of black pixels...") + + img_list = [] + for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + img = cv2_imread(x) + img_list.append ([x, img[(img == 0)].size ]) + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=False) + + return img_list + +def sort_by_origname(input_path): + io.log_info ("Sort by original filename...") + + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append( [str(filepath)] ) + continue + + img_list.append( [str(filepath), dflimg.get_source_filename()] ) + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1)) + return img_list, trash_img_list + +def sort_by_oneface_in_image(input_path): + io.log_info ("Sort by one face in images...") + image_paths = Path_utils.get_image_paths(input_path) + a = np.array ([ ( int(x[0]), int(x[1]) ) \ + for x in [ Path(filepath).stem.split('_') for filepath in image_paths ] if len(x) == 2 + ]) + if len(a) > 0: + idxs = np.ndarray.flatten ( np.argwhere ( a[:,1] != 0 ) ) + idxs = np.unique ( a[idxs][:,0] ) + idxs = np.ndarray.flatten ( np.argwhere ( np.array([ x[0] in idxs for x in a ]) == True ) ) + if len(idxs) > 0: + io.log_info ("Found %d images." % (len(idxs)) ) + img_list = [ (path,) for i,path in enumerate(image_paths) if i not in idxs ] + trash_img_list = [ (image_paths[x],) for x in idxs ] + return img_list, trash_img_list + return [], [] + +class FinalLoaderSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.log_info ('Running on %s.' % (client_dict['device_name']) ) + self.include_by_blur = client_dict['include_by_blur'] + + #override + def process_data(self, data): + filepath = Path(data[0]) + + try: + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + self.log_err("%s is not a dfl image file" % (filepath.name)) + return [ 1, [str(filepath)] ] + + bgr = cv2_imread(str(filepath)) + if bgr is None: + raise Exception ("Unable to load %s" % (filepath.name) ) + + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) + sharpness = estimate_sharpness(gray) if self.include_by_blur else 0 + pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) + + hist = cv2.calcHist([gray], [0], None, [256], [0, 256]) + except Exception as e: + self.log_err (e) + return [ 1, [str(filepath)] ] + + return [ 0, [str(filepath), sharpness, hist, yaw ] ] + + #override + def get_data_name (self, data): + #return string identificator of your data + return data[0] + + #override + def __init__(self, img_list, include_by_blur ): + self.img_list = img_list + + self.include_by_blur = include_by_blur + self.result = [] + self.result_trash = [] + + super().__init__('FinalLoader', FinalLoaderSubprocessor.Cli, 60) + + #override + def on_clients_initialized(self): + io.progress_bar ("Loading", len (self.img_list)) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def process_info_generator(self): + for i in range(0, min(multiprocessing.cpu_count(), 8) ): + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + 'include_by_blur': self.include_by_blur + } + + #override + def get_data(self, host_dict): + if len (self.img_list) > 0: + return [self.img_list.pop(0)] + + return None + + #override + def on_data_return (self, host_dict, data): + self.img_list.insert(0, data[0]) + + #override + def on_result (self, host_dict, data, result): + if result[0] == 0: + self.result.append (result[1]) + else: + self.result_trash.append (result[1]) + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.result, self.result_trash + +class FinalHistDissimSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.log_info ('Running on %s.' % (client_dict['device_name']) ) + + #override + def process_data(self, data): + idx, img_list = data + for i in range( len(img_list) ): + score_total = 0 + for j in range( len(img_list) ): + if i == j: + continue + score_total += cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + img_list[i][3] = score_total + img_list = sorted(img_list, key=operator.itemgetter(3), reverse=True) + return idx, img_list + + #override + def get_data_name (self, data): + return "Bunch of images" + + #override + def __init__(self, yaws_sample_list ): + self.yaws_sample_list = yaws_sample_list + self.yaws_sample_list_len = len(yaws_sample_list) + + self.yaws_sample_list_idxs = [ i for i in range(self.yaws_sample_list_len) if self.yaws_sample_list[i] is not None ] + self.result = [ None for _ in range(self.yaws_sample_list_len) ] + super().__init__('FinalHistDissimSubprocessor', FinalHistDissimSubprocessor.Cli) + + #override + def process_info_generator(self): + for i in range(min(multiprocessing.cpu_count(), 8) ): + yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, + 'device_name': 'CPU%d' % (i) + } + #override + def on_clients_initialized(self): + io.progress_bar ("Sort by hist-dissim", self.yaws_sample_list_len) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def get_data(self, host_dict): + if len (self.yaws_sample_list_idxs) > 0: + idx = self.yaws_sample_list_idxs.pop(0) + + return idx, self.yaws_sample_list[idx] + return None + + #override + def on_data_return (self, host_dict, data): + self.yaws_sample_list_idxs.insert(0, data[0]) + + #override + def on_result (self, host_dict, data, result): + idx, yaws_sample_list = data + self.result[idx] = yaws_sample_list + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.result + +def sort_final(input_path, include_by_blur=True): + io.log_info ("Performing final sort.") + + target_count = io.input_int ("Target number of images? (default:2000) : ", 2000) + + img_list, trash_img_list = FinalLoaderSubprocessor( Path_utils.get_image_paths(input_path), include_by_blur ).run() + final_img_list = [] + + grads = 128 + imgs_per_grad = round (target_count / grads) + + grads_space = np.linspace (-1.0,1.0,grads) + + yaws_sample_list = [None]*grads + for g in io.progress_bar_generator ( range(grads), "Sort by yaw"): + yaw = grads_space[g] + next_yaw = grads_space[g+1] if g < grads-1 else yaw + + yaw_samples = [] + for img in img_list: + s_yaw = -img[3] + if (g == 0 and s_yaw < next_yaw) or \ + (g < grads-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ + (g == grads-1 and s_yaw >= yaw): + yaw_samples += [ img ] + if len(yaw_samples) > 0: + yaws_sample_list[g] = yaw_samples + + total_lack = 0 + for g in io.progress_bar_generator ( range(grads), ""): + img_list = yaws_sample_list[g] + img_list_len = len(img_list) if img_list is not None else 0 + + lack = imgs_per_grad - img_list_len + total_lack += max(lack, 0) + + imgs_per_grad += total_lack // grads + + if include_by_blur: + sharpned_imgs_per_grad = imgs_per_grad*10 + for g in io.progress_bar_generator ( range (grads), "Sort by blur"): + img_list = yaws_sample_list[g] + if img_list is None: + continue + + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + if len(img_list) > sharpned_imgs_per_grad: + trash_img_list += img_list[sharpned_imgs_per_grad:] + img_list = img_list[0:sharpned_imgs_per_grad] + + yaws_sample_list[g] = img_list + + yaws_sample_list = FinalHistDissimSubprocessor(yaws_sample_list).run() + + for g in io.progress_bar_generator ( range (grads), "Fetching best"): + img_list = yaws_sample_list[g] + if img_list is None: + continue + + final_img_list += img_list[0:imgs_per_grad] + trash_img_list += img_list[imgs_per_grad:] + + return final_img_list, trash_img_list + +def final_process(input_path, img_list, trash_img_list): + if len(trash_img_list) != 0: + parent_input_path = input_path.parent + trash_path = parent_input_path / (input_path.stem + '_trash') + trash_path.mkdir (exist_ok=True) + + io.log_info ("Trashing %d items to %s" % ( len(trash_img_list), str(trash_path) ) ) + + for filename in Path_utils.get_image_paths(trash_path): + Path(filename).unlink() + + for i in io.progress_bar_generator( range(len(trash_img_list)), "Moving trash", leave=False): + src = Path (trash_img_list[i][0]) + dst = trash_path / src.name + try: + src.rename (dst) + except: + io.log_info ('fail to trashing %s' % (src.name) ) + + io.log_info ("") + + if len(img_list) != 0: + for i in io.progress_bar_generator( [*range(len(img_list))], "Renaming", leave=False): + src = Path (img_list[i][0]) + dst = input_path / ('%.5d_%s' % (i, src.name )) + try: + src.rename (dst) + except: + io.log_info ('fail to rename %s' % (src.name) ) + + for i in io.progress_bar_generator( [*range(len(img_list))], "Renaming"): + src = Path (img_list[i][0]) + src = input_path / ('%.5d_%s' % (i, src.name)) + dst = input_path / ('%.5d%s' % (i, src.suffix)) + try: + src.rename (dst) + except: + io.log_info ('fail to rename %s' % (src.name) ) + + + +def main (input_path, sort_by_method): + input_path = Path(input_path) + sort_by_method = sort_by_method.lower() + + io.log_info ("Running sort tool.\r\n") + + img_list = [] + trash_img_list = [] + if sort_by_method == 'blur': img_list, trash_img_list = sort_by_blur (input_path) + elif sort_by_method == 'face': img_list, trash_img_list = sort_by_face (input_path) + elif sort_by_method == 'face-dissim': img_list, trash_img_list = sort_by_face_dissim (input_path) + elif sort_by_method == 'face-yaw': img_list, trash_img_list = sort_by_face_yaw (input_path) + elif sort_by_method == 'face-pitch': img_list, trash_img_list = sort_by_face_pitch (input_path) + elif sort_by_method == 'hist': img_list = sort_by_hist (input_path) + elif sort_by_method == 'hist-dissim': img_list, trash_img_list = sort_by_hist_dissim (input_path) + elif sort_by_method == 'brightness': img_list = sort_by_brightness (input_path) + elif sort_by_method == 'hue': img_list = sort_by_hue (input_path) + elif sort_by_method == 'black': img_list = sort_by_black (input_path) + elif sort_by_method == 'origname': img_list, trash_img_list = sort_by_origname (input_path) + elif sort_by_method == 'oneface': img_list, trash_img_list = sort_by_oneface_in_image (input_path) + elif sort_by_method == 'final': img_list, trash_img_list = sort_final (input_path) + elif sort_by_method == 'final-no-blur': img_list, trash_img_list = sort_final (input_path, include_by_blur=False) + + final_process (input_path, img_list, trash_img_list) diff --git a/mainscripts/Trainer.py b/mainscripts/Trainer.py index f170fcc..71a2ca5 100644 --- a/mainscripts/Trainer.py +++ b/mainscripts/Trainer.py @@ -1,324 +1,324 @@ -import sys -import traceback -import queue -import threading -import time -import numpy as np -import itertools -from pathlib import Path -from utils import Path_utils -import imagelib -import cv2 -import models -from interact import interact as io - -def trainerThread (s2c, c2s, args, device_args): - while True: - try: - start_time = time.time() - - training_data_src_path = Path( args.get('training_data_src_dir', '') ) - training_data_dst_path = Path( args.get('training_data_dst_dir', '') ) - - pretraining_data_path = args.get('pretraining_data_dir', '') - pretraining_data_path = Path(pretraining_data_path) if pretraining_data_path is not None else None - - model_path = Path( args.get('model_path', '') ) - model_name = args.get('model_name', '') - save_interval_min = 15 - debug = args.get('debug', '') - execute_programs = args.get('execute_programs', []) - - if not training_data_src_path.exists(): - io.log_err('Training data src directory does not exist.') - break - - if not training_data_dst_path.exists(): - io.log_err('Training data dst directory does not exist.') - break - - if not model_path.exists(): - model_path.mkdir(exist_ok=True) - - model = models.import_model(model_name)( - model_path, - training_data_src_path=training_data_src_path, - training_data_dst_path=training_data_dst_path, - pretraining_data_path=pretraining_data_path, - debug=debug, - device_args=device_args) - - is_reached_goal = model.is_reached_iter_goal() - - shared_state = { 'after_save' : False } - loss_string = "" - save_iter = model.get_iter() - def model_save(): - if not debug and not is_reached_goal: - io.log_info ("Saving....", end='\r') - model.save() - shared_state['after_save'] = True - - def send_preview(): - if not debug: - previews = model.get_previews() - c2s.put ( {'op':'show', 'previews': previews, 'iter':model.get_iter(), 'loss_history': model.get_loss_history().copy() } ) - else: - previews = [( 'debug, press update for new', model.debug_one_iter())] - c2s.put ( {'op':'show', 'previews': previews} ) - - - if model.is_first_run(): - model_save() - - if model.get_target_iter() != 0: - if is_reached_goal: - io.log_info('Model already trained to target iteration. You can use preview.') - else: - io.log_info('Starting. Target iteration: %d. Press "Enter" to stop training and save model.' % ( model.get_target_iter() ) ) - else: - io.log_info('Starting. Press "Enter" to stop training and save model.') - - last_save_time = time.time() - - execute_programs = [ [x[0], x[1], time.time() ] for x in execute_programs ] - - for i in itertools.count(0,1): - if not debug: - cur_time = time.time() - - for x in execute_programs: - prog_time, prog, last_time = x - exec_prog = False - if prog_time > 0 and (cur_time - start_time) >= prog_time: - x[0] = 0 - exec_prog = True - elif prog_time < 0 and (cur_time - last_time) >= -prog_time: - x[2] = cur_time - exec_prog = True - - if exec_prog: - try: - exec(prog) - except Exception as e: - print("Unable to execute program: %s" % (prog) ) - - if not is_reached_goal: - iter, iter_time = model.train_one_iter() - - loss_history = model.get_loss_history() - time_str = time.strftime("[%H:%M:%S]") - if iter_time >= 10: - loss_string = "{0}[#{1:06d}][{2:.5s}s]".format ( time_str, iter, '{:0.4f}'.format(iter_time) ) - else: - loss_string = "{0}[#{1:06d}][{2:04d}ms]".format ( time_str, iter, int(iter_time*1000) ) - - if shared_state['after_save']: - shared_state['after_save'] = False - last_save_time = time.time() #upd last_save_time only after save+one_iter, because plaidML rebuilds programs after save https://github.com/plaidml/plaidml/issues/274 - - mean_loss = np.mean ( [ np.array(loss_history[i]) for i in range(save_iter, iter) ], axis=0) - - for loss_value in mean_loss: - loss_string += "[%.4f]" % (loss_value) - - io.log_info (loss_string) - - save_iter = iter - else: - for loss_value in loss_history[-1]: - loss_string += "[%.4f]" % (loss_value) - - if io.is_colab(): - io.log_info ('\r' + loss_string, end='') - else: - io.log_info (loss_string, end='\r') - - if model.get_target_iter() != 0 and model.is_reached_iter_goal(): - io.log_info ('Reached target iteration.') - model_save() - is_reached_goal = True - io.log_info ('You can use preview now.') - - if not is_reached_goal and (time.time() - last_save_time) >= save_interval_min*60: - model_save() - send_preview() - - if i==0: - if is_reached_goal: - model.pass_one_iter() - send_preview() - - if debug: - time.sleep(0.005) - - while not s2c.empty(): - input = s2c.get() - op = input['op'] - if op == 'save': - model_save() - elif op == 'preview': - if is_reached_goal: - model.pass_one_iter() - send_preview() - elif op == 'close': - model_save() - i = -1 - break - - if i == -1: - break - - - - model.finalize() - - except Exception as e: - print ('Error: %s' % (str(e))) - traceback.print_exc() - break - c2s.put ( {'op':'close'} ) - - - -def main(args, device_args): - io.log_info ("Running trainer.\r\n") - - no_preview = args.get('no_preview', False) - - s2c = queue.Queue() - c2s = queue.Queue() - - thread = threading.Thread(target=trainerThread, args=(s2c, c2s, args, device_args) ) - thread.start() - - if no_preview: - while True: - if not c2s.empty(): - input = c2s.get() - op = input.get('op','') - if op == 'close': - break - try: - io.process_messages(0.1) - except KeyboardInterrupt: - s2c.put ( {'op': 'close'} ) - else: - wnd_name = "Training preview" - io.named_window(wnd_name) - io.capture_keys(wnd_name) - - previews = None - loss_history = None - selected_preview = 0 - update_preview = False - is_showing = False - is_waiting_preview = False - show_last_history_iters_count = 0 - iter = 0 - while True: - if not c2s.empty(): - input = c2s.get() - op = input['op'] - if op == 'show': - is_waiting_preview = False - loss_history = input['loss_history'] if 'loss_history' in input.keys() else None - previews = input['previews'] if 'previews' in input.keys() else None - iter = input['iter'] if 'iter' in input.keys() else 0 - if previews is not None: - max_w = 0 - max_h = 0 - for (preview_name, preview_rgb) in previews: - (h, w, c) = preview_rgb.shape - max_h = max (max_h, h) - max_w = max (max_w, w) - - max_size = 800 - if max_h > max_size: - max_w = int( max_w / (max_h / max_size) ) - max_h = max_size - - #make all previews size equal - for preview in previews[:]: - (preview_name, preview_rgb) = preview - (h, w, c) = preview_rgb.shape - if h != max_h or w != max_w: - previews.remove(preview) - previews.append ( (preview_name, cv2.resize(preview_rgb, (max_w, max_h))) ) - selected_preview = selected_preview % len(previews) - update_preview = True - elif op == 'close': - break - - if update_preview: - update_preview = False - - selected_preview_name = previews[selected_preview][0] - selected_preview_rgb = previews[selected_preview][1] - (h,w,c) = selected_preview_rgb.shape - - # HEAD - head_lines = [ - '[s]:save [enter]:exit', - '[p]:update [space]:next preview [l]:change history range', - 'Preview: "%s" [%d/%d]' % (selected_preview_name,selected_preview+1, len(previews) ) - ] - head_line_height = 15 - head_height = len(head_lines) * head_line_height - head = np.ones ( (head_height,w,c) ) * 0.1 - - for i in range(0, len(head_lines)): - t = i*head_line_height - b = (i+1)*head_line_height - head[t:b, 0:w] += imagelib.get_text_image ( (head_line_height,w,c) , head_lines[i], color=[0.8]*c ) - - final = head - - if loss_history is not None: - if show_last_history_iters_count == 0: - loss_history_to_show = loss_history - else: - loss_history_to_show = loss_history[-show_last_history_iters_count:] - - lh_img = models.ModelBase.get_loss_history_preview(loss_history_to_show, iter, w, c) - final = np.concatenate ( [final, lh_img], axis=0 ) - - final = np.concatenate ( [final, selected_preview_rgb], axis=0 ) - final = np.clip(final, 0, 1) - - io.show_image( wnd_name, (final*255).astype(np.uint8) ) - is_showing = True - - key_events = io.get_key_events(wnd_name) - key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) - - if key == ord('\n') or key == ord('\r'): - s2c.put ( {'op': 'close'} ) - elif key == ord('s'): - s2c.put ( {'op': 'save'} ) - elif key == ord('p'): - if not is_waiting_preview: - is_waiting_preview = True - s2c.put ( {'op': 'preview'} ) - elif key == ord('l'): - if show_last_history_iters_count == 0: - show_last_history_iters_count = 5000 - elif show_last_history_iters_count == 5000: - show_last_history_iters_count = 10000 - elif show_last_history_iters_count == 10000: - show_last_history_iters_count = 50000 - elif show_last_history_iters_count == 50000: - show_last_history_iters_count = 100000 - elif show_last_history_iters_count == 100000: - show_last_history_iters_count = 0 - update_preview = True - elif key == ord(' '): - selected_preview = (selected_preview + 1) % len(previews) - update_preview = True - - try: - io.process_messages(0.1) - except KeyboardInterrupt: - s2c.put ( {'op': 'close'} ) - - io.destroy_all_windows() +import sys +import traceback +import queue +import threading +import time +import numpy as np +import itertools +from pathlib import Path +from utils import Path_utils +import imagelib +import cv2 +import models +from interact import interact as io + +def trainerThread (s2c, c2s, args, device_args): + while True: + try: + start_time = time.time() + + training_data_src_path = Path( args.get('training_data_src_dir', '') ) + training_data_dst_path = Path( args.get('training_data_dst_dir', '') ) + + pretraining_data_path = args.get('pretraining_data_dir', '') + pretraining_data_path = Path(pretraining_data_path) if pretraining_data_path is not None else None + + model_path = Path( args.get('model_path', '') ) + model_name = args.get('model_name', '') + save_interval_min = 15 + debug = args.get('debug', '') + execute_programs = args.get('execute_programs', []) + + if not training_data_src_path.exists(): + io.log_err('Training data src directory does not exist.') + break + + if not training_data_dst_path.exists(): + io.log_err('Training data dst directory does not exist.') + break + + if not model_path.exists(): + model_path.mkdir(exist_ok=True) + + model = models.import_model(model_name)( + model_path, + training_data_src_path=training_data_src_path, + training_data_dst_path=training_data_dst_path, + pretraining_data_path=pretraining_data_path, + debug=debug, + device_args=device_args) + + is_reached_goal = model.is_reached_iter_goal() + + shared_state = { 'after_save' : False } + loss_string = "" + save_iter = model.get_iter() + def model_save(): + if not debug and not is_reached_goal: + io.log_info ("Saving....", end='\r') + model.save() + shared_state['after_save'] = True + + def send_preview(): + if not debug: + previews = model.get_previews() + c2s.put ( {'op':'show', 'previews': previews, 'iter':model.get_iter(), 'loss_history': model.get_loss_history().copy() } ) + else: + previews = [( 'debug, press update for new', model.debug_one_iter())] + c2s.put ( {'op':'show', 'previews': previews} ) + + + if model.is_first_run(): + model_save() + + if model.get_target_iter() != 0: + if is_reached_goal: + io.log_info('Model already trained to target iteration. You can use preview.') + else: + io.log_info('Starting. Target iteration: %d. Press "Enter" to stop training and save model.' % ( model.get_target_iter() ) ) + else: + io.log_info('Starting. Press "Enter" to stop training and save model.') + + last_save_time = time.time() + + execute_programs = [ [x[0], x[1], time.time() ] for x in execute_programs ] + + for i in itertools.count(0,1): + if not debug: + cur_time = time.time() + + for x in execute_programs: + prog_time, prog, last_time = x + exec_prog = False + if prog_time > 0 and (cur_time - start_time) >= prog_time: + x[0] = 0 + exec_prog = True + elif prog_time < 0 and (cur_time - last_time) >= -prog_time: + x[2] = cur_time + exec_prog = True + + if exec_prog: + try: + exec(prog) + except Exception as e: + print("Unable to execute program: %s" % (prog) ) + + if not is_reached_goal: + iter, iter_time = model.train_one_iter() + + loss_history = model.get_loss_history() + time_str = time.strftime("[%H:%M:%S]") + if iter_time >= 10: + loss_string = "{0}[#{1:06d}][{2:.5s}s]".format ( time_str, iter, '{:0.4f}'.format(iter_time) ) + else: + loss_string = "{0}[#{1:06d}][{2:04d}ms]".format ( time_str, iter, int(iter_time*1000) ) + + if shared_state['after_save']: + shared_state['after_save'] = False + last_save_time = time.time() #upd last_save_time only after save+one_iter, because plaidML rebuilds programs after save https://github.com/plaidml/plaidml/issues/274 + + mean_loss = np.mean ( [ np.array(loss_history[i]) for i in range(save_iter, iter) ], axis=0) + + for loss_value in mean_loss: + loss_string += "[%.4f]" % (loss_value) + + io.log_info (loss_string) + + save_iter = iter + else: + for loss_value in loss_history[-1]: + loss_string += "[%.4f]" % (loss_value) + + if io.is_colab(): + io.log_info ('\r' + loss_string, end='') + else: + io.log_info (loss_string, end='\r') + + if model.get_target_iter() != 0 and model.is_reached_iter_goal(): + io.log_info ('Reached target iteration.') + model_save() + is_reached_goal = True + io.log_info ('You can use preview now.') + + if not is_reached_goal and (time.time() - last_save_time) >= save_interval_min*60: + model_save() + send_preview() + + if i==0: + if is_reached_goal: + model.pass_one_iter() + send_preview() + + if debug: + time.sleep(0.005) + + while not s2c.empty(): + input = s2c.get() + op = input['op'] + if op == 'save': + model_save() + elif op == 'preview': + if is_reached_goal: + model.pass_one_iter() + send_preview() + elif op == 'close': + model_save() + i = -1 + break + + if i == -1: + break + + + + model.finalize() + + except Exception as e: + print ('Error: %s' % (str(e))) + traceback.print_exc() + break + c2s.put ( {'op':'close'} ) + + + +def main(args, device_args): + io.log_info ("Running trainer.\r\n") + + no_preview = args.get('no_preview', False) + + s2c = queue.Queue() + c2s = queue.Queue() + + thread = threading.Thread(target=trainerThread, args=(s2c, c2s, args, device_args) ) + thread.start() + + if no_preview: + while True: + if not c2s.empty(): + input = c2s.get() + op = input.get('op','') + if op == 'close': + break + try: + io.process_messages(0.1) + except KeyboardInterrupt: + s2c.put ( {'op': 'close'} ) + else: + wnd_name = "Training preview" + io.named_window(wnd_name) + io.capture_keys(wnd_name) + + previews = None + loss_history = None + selected_preview = 0 + update_preview = False + is_showing = False + is_waiting_preview = False + show_last_history_iters_count = 0 + iter = 0 + while True: + if not c2s.empty(): + input = c2s.get() + op = input['op'] + if op == 'show': + is_waiting_preview = False + loss_history = input['loss_history'] if 'loss_history' in input.keys() else None + previews = input['previews'] if 'previews' in input.keys() else None + iter = input['iter'] if 'iter' in input.keys() else 0 + if previews is not None: + max_w = 0 + max_h = 0 + for (preview_name, preview_rgb) in previews: + (h, w, c) = preview_rgb.shape + max_h = max (max_h, h) + max_w = max (max_w, w) + + max_size = 800 + if max_h > max_size: + max_w = int( max_w / (max_h / max_size) ) + max_h = max_size + + #make all previews size equal + for preview in previews[:]: + (preview_name, preview_rgb) = preview + (h, w, c) = preview_rgb.shape + if h != max_h or w != max_w: + previews.remove(preview) + previews.append ( (preview_name, cv2.resize(preview_rgb, (max_w, max_h))) ) + selected_preview = selected_preview % len(previews) + update_preview = True + elif op == 'close': + break + + if update_preview: + update_preview = False + + selected_preview_name = previews[selected_preview][0] + selected_preview_rgb = previews[selected_preview][1] + (h,w,c) = selected_preview_rgb.shape + + # HEAD + head_lines = [ + '[s]:save [enter]:exit', + '[p]:update [space]:next preview [l]:change history range', + 'Preview: "%s" [%d/%d]' % (selected_preview_name,selected_preview+1, len(previews) ) + ] + head_line_height = 15 + head_height = len(head_lines) * head_line_height + head = np.ones ( (head_height,w,c) ) * 0.1 + + for i in range(0, len(head_lines)): + t = i*head_line_height + b = (i+1)*head_line_height + head[t:b, 0:w] += imagelib.get_text_image ( (head_line_height,w,c) , head_lines[i], color=[0.8]*c ) + + final = head + + if loss_history is not None: + if show_last_history_iters_count == 0: + loss_history_to_show = loss_history + else: + loss_history_to_show = loss_history[-show_last_history_iters_count:] + + lh_img = models.ModelBase.get_loss_history_preview(loss_history_to_show, iter, w, c) + final = np.concatenate ( [final, lh_img], axis=0 ) + + final = np.concatenate ( [final, selected_preview_rgb], axis=0 ) + final = np.clip(final, 0, 1) + + io.show_image( wnd_name, (final*255).astype(np.uint8) ) + is_showing = True + + key_events = io.get_key_events(wnd_name) + key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) + + if key == ord('\n') or key == ord('\r'): + s2c.put ( {'op': 'close'} ) + elif key == ord('s'): + s2c.put ( {'op': 'save'} ) + elif key == ord('p'): + if not is_waiting_preview: + is_waiting_preview = True + s2c.put ( {'op': 'preview'} ) + elif key == ord('l'): + if show_last_history_iters_count == 0: + show_last_history_iters_count = 5000 + elif show_last_history_iters_count == 5000: + show_last_history_iters_count = 10000 + elif show_last_history_iters_count == 10000: + show_last_history_iters_count = 50000 + elif show_last_history_iters_count == 50000: + show_last_history_iters_count = 100000 + elif show_last_history_iters_count == 100000: + show_last_history_iters_count = 0 + update_preview = True + elif key == ord(' '): + selected_preview = (selected_preview + 1) % len(previews) + update_preview = True + + try: + io.process_messages(0.1) + except KeyboardInterrupt: + s2c.put ( {'op': 'close'} ) + + io.destroy_all_windows() diff --git a/mainscripts/Util.py b/mainscripts/Util.py index 0d3e7c4..1921287 100644 --- a/mainscripts/Util.py +++ b/mainscripts/Util.py @@ -1,156 +1,156 @@ -import cv2 -from pathlib import Path -from utils import Path_utils -from utils.DFLPNG import DFLPNG -from utils.DFLJPG import DFLJPG -from utils.cv2_utils import * -from facelib import LandmarksProcessor -from interact import interact as io - -def remove_fanseg_file (filepath): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - return - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - return - - dflimg.remove_fanseg_mask() - dflimg.embed_and_set( str(filepath) ) - - -def remove_fanseg_folder(input_path): - input_path = Path(input_path) - - io.log_info ("Removing fanseg mask...\r\n") - - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Removing"): - filepath = Path(filepath) - remove_fanseg_file(filepath) - -def convert_png_to_jpg_file (filepath): - filepath = Path(filepath) - - if filepath.suffix != '.png': - return - - dflpng = DFLPNG.load (str(filepath) ) - if dflpng is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - return - - dfl_dict = dflpng.getDFLDictData() - - img = cv2_imread (str(filepath)) - new_filepath = str(filepath.parent / (filepath.stem + '.jpg')) - cv2_imwrite ( new_filepath, img, [int(cv2.IMWRITE_JPEG_QUALITY), 85]) - - DFLJPG.embed_data( new_filepath, - face_type=dfl_dict.get('face_type', None), - landmarks=dfl_dict.get('landmarks', None), - ie_polys=dfl_dict.get('ie_polys', None), - source_filename=dfl_dict.get('source_filename', None), - source_rect=dfl_dict.get('source_rect', None), - source_landmarks=dfl_dict.get('source_landmarks', None) ) - - filepath.unlink() - -def convert_png_to_jpg_folder (input_path): - input_path = Path(input_path) - - io.log_info ("Converting PNG to JPG...\r\n") - - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Converting"): - filepath = Path(filepath) - convert_png_to_jpg_file(filepath) - -def add_landmarks_debug_images(input_path): - io.log_info ("Adding landmarks debug images...") - - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): - filepath = Path(filepath) - - img = cv2_imread(str(filepath)) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - - if img is not None: - face_landmarks = dflimg.get_landmarks() - LandmarksProcessor.draw_landmarks(img, face_landmarks, transparent_mask=True, ie_polys=dflimg.get_ie_polys() ) - - output_file = '{}{}'.format( str(Path(str(input_path)) / filepath.stem), '_debug.jpg') - cv2_imwrite(output_file, img, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) - -def recover_original_aligned_filename(input_path): - io.log_info ("Recovering original aligned filename...") - - files = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - - files += [ [filepath, None, dflimg.get_source_filename(), False] ] - - files_len = len(files) - for i in io.progress_bar_generator( range(files_len), "Sorting" ): - fp, _, sf, converted = files[i] - - if converted: - continue - - sf_stem = Path(sf).stem - - files[i][1] = fp.parent / ( sf_stem + '_0' + fp.suffix ) - files[i][3] = True - c = 1 - - for j in range(i+1, files_len): - fp_j, _, sf_j, converted_j = files[j] - if converted_j: - continue - - if sf_j == sf: - files[j][1] = fp_j.parent / ( sf_stem + ('_%d' % (c)) + fp_j.suffix ) - files[j][3] = True - c += 1 - - for file in io.progress_bar_generator( files, "Renaming", leave=False ): - fs, _, _, _ = file - dst = fs.parent / ( fs.stem + '_tmp' + fs.suffix ) - try: - fs.rename (dst) - except: - io.log_err ('fail to rename %s' % (fs.name) ) - - for file in io.progress_bar_generator( files, "Renaming" ): - fs, fd, _, _ = file - fs = fs.parent / ( fs.stem + '_tmp' + fs.suffix ) - try: - fs.rename (fd) - except: - io.log_err ('fail to rename %s' % (fs.name) ) +import cv2 +from pathlib import Path +from utils import Path_utils +from utils.DFLPNG import DFLPNG +from utils.DFLJPG import DFLJPG +from utils.cv2_utils import * +from facelib import LandmarksProcessor +from interact import interact as io + +def remove_fanseg_file (filepath): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + return + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + return + + dflimg.remove_fanseg_mask() + dflimg.embed_and_set( str(filepath) ) + + +def remove_fanseg_folder(input_path): + input_path = Path(input_path) + + io.log_info ("Removing fanseg mask...\r\n") + + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Removing"): + filepath = Path(filepath) + remove_fanseg_file(filepath) + +def convert_png_to_jpg_file (filepath): + filepath = Path(filepath) + + if filepath.suffix != '.png': + return + + dflpng = DFLPNG.load (str(filepath) ) + if dflpng is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + return + + dfl_dict = dflpng.getDFLDictData() + + img = cv2_imread (str(filepath)) + new_filepath = str(filepath.parent / (filepath.stem + '.jpg')) + cv2_imwrite ( new_filepath, img, [int(cv2.IMWRITE_JPEG_QUALITY), 85]) + + DFLJPG.embed_data( new_filepath, + face_type=dfl_dict.get('face_type', None), + landmarks=dfl_dict.get('landmarks', None), + ie_polys=dfl_dict.get('ie_polys', None), + source_filename=dfl_dict.get('source_filename', None), + source_rect=dfl_dict.get('source_rect', None), + source_landmarks=dfl_dict.get('source_landmarks', None) ) + + filepath.unlink() + +def convert_png_to_jpg_folder (input_path): + input_path = Path(input_path) + + io.log_info ("Converting PNG to JPG...\r\n") + + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Converting"): + filepath = Path(filepath) + convert_png_to_jpg_file(filepath) + +def add_landmarks_debug_images(input_path): + io.log_info ("Adding landmarks debug images...") + + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): + filepath = Path(filepath) + + img = cv2_imread(str(filepath)) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + continue + + if img is not None: + face_landmarks = dflimg.get_landmarks() + LandmarksProcessor.draw_landmarks(img, face_landmarks, transparent_mask=True, ie_polys=dflimg.get_ie_polys() ) + + output_file = '{}{}'.format( str(Path(str(input_path)) / filepath.stem), '_debug.jpg') + cv2_imwrite(output_file, img, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) + +def recover_original_aligned_filename(input_path): + io.log_info ("Recovering original aligned filename...") + + files = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + continue + + files += [ [filepath, None, dflimg.get_source_filename(), False] ] + + files_len = len(files) + for i in io.progress_bar_generator( range(files_len), "Sorting" ): + fp, _, sf, converted = files[i] + + if converted: + continue + + sf_stem = Path(sf).stem + + files[i][1] = fp.parent / ( sf_stem + '_0' + fp.suffix ) + files[i][3] = True + c = 1 + + for j in range(i+1, files_len): + fp_j, _, sf_j, converted_j = files[j] + if converted_j: + continue + + if sf_j == sf: + files[j][1] = fp_j.parent / ( sf_stem + ('_%d' % (c)) + fp_j.suffix ) + files[j][3] = True + c += 1 + + for file in io.progress_bar_generator( files, "Renaming", leave=False ): + fs, _, _, _ = file + dst = fs.parent / ( fs.stem + '_tmp' + fs.suffix ) + try: + fs.rename (dst) + except: + io.log_err ('fail to rename %s' % (fs.name) ) + + for file in io.progress_bar_generator( files, "Renaming" ): + fs, fd, _, _ = file + fs = fs.parent / ( fs.stem + '_tmp' + fs.suffix ) + try: + fs.rename (fd) + except: + io.log_err ('fail to rename %s' % (fs.name) ) diff --git a/mainscripts/VideoEd.py b/mainscripts/VideoEd.py index 2951a06..28e69d4 100644 --- a/mainscripts/VideoEd.py +++ b/mainscripts/VideoEd.py @@ -1,199 +1,199 @@ -import subprocess -import numpy as np -import ffmpeg -from pathlib import Path -from utils import Path_utils -from interact import interact as io - -def extract_video(input_file, output_dir, output_ext=None, fps=None): - input_file_path = Path(input_file) - output_path = Path(output_dir) - - if not output_path.exists(): - output_path.mkdir(exist_ok=True) - - - if input_file_path.suffix == '.*': - input_file_path = Path_utils.get_first_file_by_stem (input_file_path.parent, input_file_path.stem) - else: - if not input_file_path.exists(): - input_file_path = None - - if input_file_path is None: - io.log_err("input_file not found.") - return - - if fps is None: - fps = io.input_int ("Enter FPS ( ?:help skip:fullfps ) : ", 0, help_message="How many frames of every second of the video will be extracted.") - - if output_ext is None: - output_ext = io.input_str ("Output image format? ( jpg png ?:help skip:png ) : ", "png", ["png","jpg"], help_message="png is lossless, but extraction is x10 slower for HDD, requires x10 more disk space than jpg.") - - for filename in Path_utils.get_image_paths (output_path, ['.'+output_ext]): - Path(filename).unlink() - - job = ffmpeg.input(str(input_file_path)) - - kwargs = {'pix_fmt': 'rgb24'} - if fps != 0: - kwargs.update ({'r':str(fps)}) - - if output_ext == 'jpg': - kwargs.update ({'q:v':'2'}) #highest quality for jpg - - job = job.output( str (output_path / ('%5d.'+output_ext)), **kwargs ) - - try: - job = job.run() - except: - io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) - -def cut_video ( input_file, from_time=None, to_time=None, audio_track_id=None, bitrate=None): - input_file_path = Path(input_file) - if input_file_path is None: - io.log_err("input_file not found.") - return - - output_file_path = input_file_path.parent / (input_file_path.stem + "_cut" + input_file_path.suffix) - - if from_time is None: - from_time = io.input_str ("From time (skip: 00:00:00.000) : ", "00:00:00.000") - - if to_time is None: - to_time = io.input_str ("To time (skip: 00:00:00.000) : ", "00:00:00.000") - - if audio_track_id is None: - audio_track_id = io.input_int ("Specify audio track id. ( skip:0 ) : ", 0) - - if bitrate is None: - bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:25) : ", 25) ) - - kwargs = {"c:v": "libx264", - "b:v": "%dM" %(bitrate), - "pix_fmt": "yuv420p", - } - - job = ffmpeg.input(str(input_file_path), ss=from_time, to=to_time) - - job_v = job['v:0'] - job_a = job['a:' + str(audio_track_id) + '?' ] - - job = ffmpeg.output(job_v, job_a, str(output_file_path), **kwargs).overwrite_output() - - try: - job = job.run() - except: - io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) - -def denoise_image_sequence( input_dir, ext=None, factor=None ): - input_path = Path(input_dir) - - if not input_path.exists(): - io.log_err("input_dir not found.") - return - - if ext is None: - ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") - - if factor is None: - factor = np.clip ( io.input_int ("Denoise factor? (1-20 default:5) : ", 5), 1, 20 ) - - job = ( ffmpeg - .input(str ( input_path / ('%5d.'+ext) ) ) - .filter("hqdn3d", factor, factor, 5,5) - .output(str ( input_path / ('%5d.'+ext) ) ) - ) - - try: - job = job.run() - except: - io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) - -def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, lossless=None ): - input_path = Path(input_dir) - output_file_path = Path(output_file) - reference_file_path = Path(reference_file) if reference_file is not None else None - - if not input_path.exists(): - io.log_err("input_dir not found.") - return - - if not output_file_path.parent.exists(): - output_file_path.parent.mkdir(parents=True, exist_ok=True) - return - - out_ext = output_file_path.suffix - - if ext is None: - ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") - - if lossless is None: - lossless = io.input_bool ("Use lossless codec ? ( default:no ) : ", False) - - video_id = None - audio_id = None - ref_in_a = None - if reference_file_path is not None: - if reference_file_path.suffix == '.*': - reference_file_path = Path_utils.get_first_file_by_stem (reference_file_path.parent, reference_file_path.stem) - else: - if not reference_file_path.exists(): - reference_file_path = None - - if reference_file_path is None: - io.log_err("reference_file not found.") - return - - #probing reference file - probe = ffmpeg.probe (str(reference_file_path)) - - #getting first video and audio streams id with fps - for stream in probe['streams']: - if video_id is None and stream['codec_type'] == 'video': - video_id = stream['index'] - fps = stream['r_frame_rate'] - - if audio_id is None and stream['codec_type'] == 'audio': - audio_id = stream['index'] - - if audio_id is not None: - #has audio track - ref_in_a = ffmpeg.input (str(reference_file_path))[str(audio_id)] - - if fps is None: - #if fps not specified and not overwritten by reference-file - fps = max (1, io.input_int ("FPS ? (default:25) : ", 25) ) - - if not lossless and bitrate is None: - bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:16) : ", 16) ) - - i_in = ffmpeg.input(str (input_path / ('%5d.'+ext)), r=fps) - - output_args = [i_in] - - if ref_in_a is not None: - output_args += [ref_in_a] - - output_args += [str (output_file_path)] - - output_kwargs = {} - - if lossless: - output_kwargs.update ({"c:v": "png" - }) - else: - output_kwargs.update ({"c:v": "libx264", - "b:v": "%dM" %(bitrate), - "pix_fmt": "yuv420p", - }) - - output_kwargs.update ({"c:a": "aac", - "b:a": "192k", - "ar" : "48000" - }) - - job = ( ffmpeg.output(*output_args, **output_kwargs).overwrite_output() ) - try: - job = job.run() - except: - io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) +import subprocess +import numpy as np +import ffmpeg +from pathlib import Path +from utils import Path_utils +from interact import interact as io + +def extract_video(input_file, output_dir, output_ext=None, fps=None): + input_file_path = Path(input_file) + output_path = Path(output_dir) + + if not output_path.exists(): + output_path.mkdir(exist_ok=True) + + + if input_file_path.suffix == '.*': + input_file_path = Path_utils.get_first_file_by_stem (input_file_path.parent, input_file_path.stem) + else: + if not input_file_path.exists(): + input_file_path = None + + if input_file_path is None: + io.log_err("input_file not found.") + return + + if fps is None: + fps = io.input_int ("Enter FPS ( ?:help skip:fullfps ) : ", 0, help_message="How many frames of every second of the video will be extracted.") + + if output_ext is None: + output_ext = io.input_str ("Output image format? ( jpg png ?:help skip:png ) : ", "png", ["png","jpg"], help_message="png is lossless, but extraction is x10 slower for HDD, requires x10 more disk space than jpg.") + + for filename in Path_utils.get_image_paths (output_path, ['.'+output_ext]): + Path(filename).unlink() + + job = ffmpeg.input(str(input_file_path)) + + kwargs = {'pix_fmt': 'rgb24'} + if fps != 0: + kwargs.update ({'r':str(fps)}) + + if output_ext == 'jpg': + kwargs.update ({'q:v':'2'}) #highest quality for jpg + + job = job.output( str (output_path / ('%5d.'+output_ext)), **kwargs ) + + try: + job = job.run() + except: + io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) + +def cut_video ( input_file, from_time=None, to_time=None, audio_track_id=None, bitrate=None): + input_file_path = Path(input_file) + if input_file_path is None: + io.log_err("input_file not found.") + return + + output_file_path = input_file_path.parent / (input_file_path.stem + "_cut" + input_file_path.suffix) + + if from_time is None: + from_time = io.input_str ("From time (skip: 00:00:00.000) : ", "00:00:00.000") + + if to_time is None: + to_time = io.input_str ("To time (skip: 00:00:00.000) : ", "00:00:00.000") + + if audio_track_id is None: + audio_track_id = io.input_int ("Specify audio track id. ( skip:0 ) : ", 0) + + if bitrate is None: + bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:25) : ", 25) ) + + kwargs = {"c:v": "libx264", + "b:v": "%dM" %(bitrate), + "pix_fmt": "yuv420p", + } + + job = ffmpeg.input(str(input_file_path), ss=from_time, to=to_time) + + job_v = job['v:0'] + job_a = job['a:' + str(audio_track_id) + '?' ] + + job = ffmpeg.output(job_v, job_a, str(output_file_path), **kwargs).overwrite_output() + + try: + job = job.run() + except: + io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) + +def denoise_image_sequence( input_dir, ext=None, factor=None ): + input_path = Path(input_dir) + + if not input_path.exists(): + io.log_err("input_dir not found.") + return + + if ext is None: + ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") + + if factor is None: + factor = np.clip ( io.input_int ("Denoise factor? (1-20 default:5) : ", 5), 1, 20 ) + + job = ( ffmpeg + .input(str ( input_path / ('%5d.'+ext) ) ) + .filter("hqdn3d", factor, factor, 5,5) + .output(str ( input_path / ('%5d.'+ext) ) ) + ) + + try: + job = job.run() + except: + io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) + +def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, lossless=None ): + input_path = Path(input_dir) + output_file_path = Path(output_file) + reference_file_path = Path(reference_file) if reference_file is not None else None + + if not input_path.exists(): + io.log_err("input_dir not found.") + return + + if not output_file_path.parent.exists(): + output_file_path.parent.mkdir(parents=True, exist_ok=True) + return + + out_ext = output_file_path.suffix + + if ext is None: + ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") + + if lossless is None: + lossless = io.input_bool ("Use lossless codec ? ( default:no ) : ", False) + + video_id = None + audio_id = None + ref_in_a = None + if reference_file_path is not None: + if reference_file_path.suffix == '.*': + reference_file_path = Path_utils.get_first_file_by_stem (reference_file_path.parent, reference_file_path.stem) + else: + if not reference_file_path.exists(): + reference_file_path = None + + if reference_file_path is None: + io.log_err("reference_file not found.") + return + + #probing reference file + probe = ffmpeg.probe (str(reference_file_path)) + + #getting first video and audio streams id with fps + for stream in probe['streams']: + if video_id is None and stream['codec_type'] == 'video': + video_id = stream['index'] + fps = stream['r_frame_rate'] + + if audio_id is None and stream['codec_type'] == 'audio': + audio_id = stream['index'] + + if audio_id is not None: + #has audio track + ref_in_a = ffmpeg.input (str(reference_file_path))[str(audio_id)] + + if fps is None: + #if fps not specified and not overwritten by reference-file + fps = max (1, io.input_int ("FPS ? (default:25) : ", 25) ) + + if not lossless and bitrate is None: + bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:16) : ", 16) ) + + i_in = ffmpeg.input(str (input_path / ('%5d.'+ext)), r=fps) + + output_args = [i_in] + + if ref_in_a is not None: + output_args += [ref_in_a] + + output_args += [str (output_file_path)] + + output_kwargs = {} + + if lossless: + output_kwargs.update ({"c:v": "png" + }) + else: + output_kwargs.update ({"c:v": "libx264", + "b:v": "%dM" %(bitrate), + "pix_fmt": "yuv420p", + }) + + output_kwargs.update ({"c:a": "aac", + "b:a": "192k", + "ar" : "48000" + }) + + job = ( ffmpeg.output(*output_args, **output_kwargs).overwrite_output() ) + try: + job = job.run() + except: + io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) diff --git a/mathlib/__init__.py b/mathlib/__init__.py index a11e725..50061ee 100644 --- a/mathlib/__init__.py +++ b/mathlib/__init__.py @@ -1,25 +1,25 @@ -import numpy as np -import math -from .umeyama import umeyama - -def get_power_of_two(x): - i = 0 - while (1 << i) < x: - i += 1 - return i - -def rotationMatrixToEulerAngles(R) : - sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) - singular = sy < 1e-6 - if not singular : - x = math.atan2(R[2,1] , R[2,2]) - y = math.atan2(-R[2,0], sy) - z = math.atan2(R[1,0], R[0,0]) - else : - x = math.atan2(-R[1,2], R[1,1]) - y = math.atan2(-R[2,0], sy) - z = 0 - return np.array([x, y, z]) - -def polygon_area(x,y): - return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1))) +import numpy as np +import math +from .umeyama import umeyama + +def get_power_of_two(x): + i = 0 + while (1 << i) < x: + i += 1 + return i + +def rotationMatrixToEulerAngles(R) : + sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) + singular = sy < 1e-6 + if not singular : + x = math.atan2(R[2,1] , R[2,2]) + y = math.atan2(-R[2,0], sy) + z = math.atan2(R[1,0], R[0,0]) + else : + x = math.atan2(-R[1,2], R[1,1]) + y = math.atan2(-R[2,0], sy) + z = 0 + return np.array([x, y, z]) + +def polygon_area(x,y): + return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1))) diff --git a/mathlib/umeyama.py b/mathlib/umeyama.py index 7c6b2d0..2c6491a 100644 --- a/mathlib/umeyama.py +++ b/mathlib/umeyama.py @@ -1,71 +1,71 @@ -import numpy as np - -def umeyama(src, dst, estimate_scale): - """Estimate N-D similarity transformation with or without scaling. - Parameters - ---------- - src : (M, N) array - Source coordinates. - dst : (M, N) array - Destination coordinates. - estimate_scale : bool - Whether to estimate scaling factor. - Returns - ------- - T : (N + 1, N + 1) - The homogeneous similarity transformation matrix. The matrix contains - NaN values only if the problem is not well-conditioned. - References - ---------- - .. [1] "Least-squares estimation of transformation parameters between two - point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573 - """ - - num = src.shape[0] - dim = src.shape[1] - - # Compute mean of src and dst. - src_mean = src.mean(axis=0) - dst_mean = dst.mean(axis=0) - - # Subtract mean from src and dst. - src_demean = src - src_mean - dst_demean = dst - dst_mean - - # Eq. (38). - A = np.dot(dst_demean.T, src_demean) / num - - # Eq. (39). - d = np.ones((dim,), dtype=np.double) - if np.linalg.det(A) < 0: - d[dim - 1] = -1 - - T = np.eye(dim + 1, dtype=np.double) - - U, S, V = np.linalg.svd(A) - - # Eq. (40) and (43). - rank = np.linalg.matrix_rank(A) - if rank == 0: - return np.nan * T - elif rank == dim - 1: - if np.linalg.det(U) * np.linalg.det(V) > 0: - T[:dim, :dim] = np.dot(U, V) - else: - s = d[dim - 1] - d[dim - 1] = -1 - T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) - d[dim - 1] = s - else: - T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T)) - - if estimate_scale: - # Eq. (41) and (42). - scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d) - else: - scale = 1.0 - - T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T) - T[:dim, :dim] *= scale - - return T +import numpy as np + +def umeyama(src, dst, estimate_scale): + """Estimate N-D similarity transformation with or without scaling. + Parameters + ---------- + src : (M, N) array + Source coordinates. + dst : (M, N) array + Destination coordinates. + estimate_scale : bool + Whether to estimate scaling factor. + Returns + ------- + T : (N + 1, N + 1) + The homogeneous similarity transformation matrix. The matrix contains + NaN values only if the problem is not well-conditioned. + References + ---------- + .. [1] "Least-squares estimation of transformation parameters between two + point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573 + """ + + num = src.shape[0] + dim = src.shape[1] + + # Compute mean of src and dst. + src_mean = src.mean(axis=0) + dst_mean = dst.mean(axis=0) + + # Subtract mean from src and dst. + src_demean = src - src_mean + dst_demean = dst - dst_mean + + # Eq. (38). + A = np.dot(dst_demean.T, src_demean) / num + + # Eq. (39). + d = np.ones((dim,), dtype=np.double) + if np.linalg.det(A) < 0: + d[dim - 1] = -1 + + T = np.eye(dim + 1, dtype=np.double) + + U, S, V = np.linalg.svd(A) + + # Eq. (40) and (43). + rank = np.linalg.matrix_rank(A) + if rank == 0: + return np.nan * T + elif rank == dim - 1: + if np.linalg.det(U) * np.linalg.det(V) > 0: + T[:dim, :dim] = np.dot(U, V) + else: + s = d[dim - 1] + d[dim - 1] = -1 + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) + d[dim - 1] = s + else: + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T)) + + if estimate_scale: + # Eq. (41) and (42). + scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d) + else: + scale = 1.0 + + T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T) + T[:dim, :dim] *= scale + + return T diff --git a/models/ModelBase.py b/models/ModelBase.py index c252d0d..62300a8 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -1,615 +1,615 @@ -import colorsys -import inspect -import json -import os -import pickle -import shutil -import time -from pathlib import Path - -import cv2 -import numpy as np - -import imagelib -from interact import interact as io -from nnlib import nnlib -from samplelib import SampleGeneratorBase -from utils import Path_utils, std_utils -from utils.cv2_utils import * - -''' -You can implement your own model. Check examples. -''' -class ModelBase(object): - - - def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, pretraining_data_path=None, debug = False, device_args = None, - ask_enable_autobackup=True, - ask_write_preview_history=True, - ask_target_iter=True, - ask_batch_size=True, - ask_sort_by_yaw=True, - ask_random_flip=True, - ask_src_scale_mod=True): - - device_args['force_gpu_idx'] = device_args.get('force_gpu_idx',-1) - device_args['cpu_only'] = device_args.get('cpu_only',False) - - if device_args['force_gpu_idx'] == -1 and not device_args['cpu_only']: - idxs_names_list = nnlib.device.getValidDevicesIdxsWithNamesList() - if len(idxs_names_list) > 1: - io.log_info ("You have multi GPUs in a system: ") - for idx, name in idxs_names_list: - io.log_info ("[%d] : %s" % (idx, name) ) - - device_args['force_gpu_idx'] = io.input_int("Which GPU idx to choose? ( skip: best GPU ) : ", -1, [ x[0] for x in idxs_names_list] ) - self.device_args = device_args - - self.device_config = nnlib.DeviceConfig(allow_growth=False, **self.device_args) - - io.log_info ("Loading model...") - - self.model_path = model_path - self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat') ) - - self.training_data_src_path = training_data_src_path - self.training_data_dst_path = training_data_dst_path - self.pretraining_data_path = pretraining_data_path - - self.src_images_paths = None - self.dst_images_paths = None - self.src_yaw_images_paths = None - self.dst_yaw_images_paths = None - self.src_data_generator = None - self.dst_data_generator = None - self.debug = debug - self.is_training_mode = (training_data_src_path is not None and training_data_dst_path is not None) - - self.iter = 0 - self.options = {} - self.loss_history = [] - self.sample_for_preview = None - - model_data = {} - if self.model_data_path.exists(): - model_data = pickle.loads ( self.model_data_path.read_bytes() ) - self.iter = max( model_data.get('iter',0), model_data.get('epoch',0) ) - if 'epoch' in self.options: - self.options.pop('epoch') - if self.iter != 0: - self.options = model_data['options'] - self.loss_history = model_data.get('loss_history', []) - self.sample_for_preview = model_data.get('sample_for_preview', None) - - ask_override = self.is_training_mode and self.iter != 0 and io.input_in_time ("Press enter in 2 seconds to override model settings.", 5 if io.is_colab() else 2 ) - - yn_str = {True:'y',False:'n'} - - if self.iter == 0: - io.log_info ("\nModel first run. Enter model options as default for each run.") - - if ask_enable_autobackup and (self.iter == 0 or ask_override): - default_autobackup = False if self.iter == 0 else self.options.get('autobackup',False) - self.options['autobackup'] = io.input_bool("Enable autobackup? (y/n ?:help skip:%s) : " % (yn_str[default_autobackup]) , default_autobackup, help_message="Autobackup model files with preview every hour for last 15 hours. Latest backup located in model/<>_autobackups/01") - else: - self.options['autobackup'] = self.options.get('autobackup', False) - - if ask_write_preview_history and (self.iter == 0 or ask_override): - default_write_preview_history = False if self.iter == 0 else self.options.get('write_preview_history',False) - self.options['write_preview_history'] = io.input_bool("Write preview history? (y/n ?:help skip:%s) : " % (yn_str[default_write_preview_history]) , default_write_preview_history, help_message="Preview history will be writed to _history folder.") - else: - self.options['write_preview_history'] = self.options.get('write_preview_history', False) - - if (self.iter == 0 or ask_override) and self.options['write_preview_history'] and io.is_support_windows(): - choose_preview_history = io.input_bool("Choose image for the preview history? (y/n skip:%s) : " % (yn_str[False]) , False) - else: - choose_preview_history = False - - if ask_target_iter: - if (self.iter == 0 or ask_override): - self.options['target_iter'] = max(0, io.input_int("Target iteration (skip:unlimited/default) : ", 0)) - else: - self.options['target_iter'] = max(model_data.get('target_iter',0), self.options.get('target_epoch',0)) - if 'target_epoch' in self.options: - self.options.pop('target_epoch') - - if ask_batch_size and (self.iter == 0 or ask_override): - default_batch_size = 0 if self.iter == 0 else self.options.get('batch_size',0) - self.options['batch_size'] = max(0, io.input_int("Batch_size (?:help skip:%d) : " % (default_batch_size), default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually.")) - else: - self.options['batch_size'] = self.options.get('batch_size', 0) - - if ask_sort_by_yaw: - if (self.iter == 0 or ask_override): - default_sort_by_yaw = self.options.get('sort_by_yaw', False) - self.options['sort_by_yaw'] = io.input_bool("Feed faces to network sorted by yaw? (y/n ?:help skip:%s) : " % (yn_str[default_sort_by_yaw]), default_sort_by_yaw, help_message="NN will not learn src face directions that don't match dst face directions. Do not use if the dst face has hair that covers the jaw." ) - else: - self.options['sort_by_yaw'] = self.options.get('sort_by_yaw', False) - - if ask_random_flip: - if (self.iter == 0 or ask_override): - self.options['random_flip'] = io.input_bool("Flip faces randomly? (y/n ?:help skip:y) : ", True, help_message="Predicted face will look more naturally without this option, but src faceset should cover all face directions as dst faceset.") - else: - self.options['random_flip'] = self.options.get('random_flip', True) - - if ask_src_scale_mod: - if (self.iter == 0): - self.options['src_scale_mod'] = np.clip( io.input_int("Src face scale modifier % ( -30...30, ?:help skip:0) : ", 0, help_message="If src face shape is wider than dst, try to decrease this value to get a better result."), -30, 30) - else: - self.options['src_scale_mod'] = self.options.get('src_scale_mod', 0) - - self.autobackup = self.options.get('autobackup', False) - if not self.autobackup and 'autobackup' in self.options: - self.options.pop('autobackup') - - self.write_preview_history = self.options.get('write_preview_history', False) - if not self.write_preview_history and 'write_preview_history' in self.options: - self.options.pop('write_preview_history') - - self.target_iter = self.options.get('target_iter',0) - if self.target_iter == 0 and 'target_iter' in self.options: - self.options.pop('target_iter') - - self.batch_size = self.options.get('batch_size',0) - self.sort_by_yaw = self.options.get('sort_by_yaw',False) - self.random_flip = self.options.get('random_flip',True) - - self.src_scale_mod = self.options.get('src_scale_mod',0) - if self.src_scale_mod == 0 and 'src_scale_mod' in self.options: - self.options.pop('src_scale_mod') - - self.onInitializeOptions(self.iter == 0, ask_override) - - nnlib.import_all(self.device_config) - self.keras = nnlib.keras - self.K = nnlib.keras.backend - - self.onInitialize() - - self.options['batch_size'] = self.batch_size - - if self.debug or self.batch_size == 0: - self.batch_size = 1 - - if self.is_training_mode: - if self.device_args['force_gpu_idx'] == -1: - self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) ) - self.autobackups_path = self.model_path / ( '%s_autobackups' % (self.get_model_name()) ) - else: - self.preview_history_path = self.model_path / ( '%d_%s_history' % (self.device_args['force_gpu_idx'], self.get_model_name()) ) - self.autobackups_path = self.model_path / ( '%d_%s_autobackups' % (self.device_args['force_gpu_idx'], self.get_model_name()) ) - - if self.autobackup: - self.autobackup_current_hour = time.localtime().tm_hour - - if not self.autobackups_path.exists(): - self.autobackups_path.mkdir(exist_ok=True) - - if self.write_preview_history or io.is_colab(): - if not self.preview_history_path.exists(): - self.preview_history_path.mkdir(exist_ok=True) - else: - if self.iter == 0: - for filename in Path_utils.get_image_paths(self.preview_history_path): - Path(filename).unlink() - - if self.generator_list is None: - raise ValueError( 'You didnt set_training_data_generators()') - else: - for i, generator in enumerate(self.generator_list): - if not isinstance(generator, SampleGeneratorBase): - raise ValueError('training data generator is not subclass of SampleGeneratorBase') - - if self.sample_for_preview is None or choose_preview_history: - if choose_preview_history and io.is_support_windows(): - wnd_name = "[p] - next. [enter] - confirm." - io.named_window(wnd_name) - io.capture_keys(wnd_name) - choosed = False - while not choosed: - self.sample_for_preview = self.generate_next_sample() - preview = self.get_static_preview() - io.show_image( wnd_name, (preview*255).astype(np.uint8) ) - - while True: - key_events = io.get_key_events(wnd_name) - key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) - if key == ord('\n') or key == ord('\r'): - choosed = True - break - elif key == ord('p'): - break - - try: - io.process_messages(0.1) - except KeyboardInterrupt: - choosed = True - - io.destroy_window(wnd_name) - else: - self.sample_for_preview = self.generate_next_sample() - self.last_sample = self.sample_for_preview - model_summary_text = [] - - model_summary_text += ["===== Model summary ====="] - model_summary_text += ["== Model name: " + self.get_model_name()] - model_summary_text += ["=="] - model_summary_text += ["== Current iteration: " + str(self.iter)] - model_summary_text += ["=="] - model_summary_text += ["== Model options:"] - for key in self.options.keys(): - model_summary_text += ["== |== %s : %s" % (key, self.options[key])] - - if self.device_config.multi_gpu: - model_summary_text += ["== |== multi_gpu : True "] - - model_summary_text += ["== Running on:"] - if self.device_config.cpu_only: - model_summary_text += ["== |== [CPU]"] - else: - for idx in self.device_config.gpu_idxs: - model_summary_text += ["== |== [%d : %s]" % (idx, nnlib.device.getDeviceName(idx))] - - if not self.device_config.cpu_only and self.device_config.gpu_vram_gb[0] == 2: - model_summary_text += ["=="] - model_summary_text += ["== WARNING: You are using 2GB GPU. Result quality may be significantly decreased."] - model_summary_text += ["== If training does not start, close all programs and try again."] - model_summary_text += ["== Also you can disable Windows Aero Desktop to get extra free VRAM."] - model_summary_text += ["=="] - - model_summary_text += ["========================="] - model_summary_text = "\r\n".join (model_summary_text) - self.model_summary_text = model_summary_text - io.log_info(model_summary_text) - - #overridable - def onInitializeOptions(self, is_first_run, ask_override): - pass - - #overridable - def onInitialize(self): - ''' - initialize your keras models - - store and retrieve your model options in self.options[''] - - check example - ''' - pass - - #overridable - def onSave(self): - #save your keras models here - pass - - #overridable - def onTrainOneIter(self, sample, generator_list): - #train your keras models here - - #return array of losses - return ( ('loss_src', 0), ('loss_dst', 0) ) - - #overridable - def onGetPreview(self, sample): - #you can return multiple previews - #return [ ('preview_name',preview_rgb), ... ] - return [] - - #overridable if you want model name differs from folder name - def get_model_name(self): - return Path(inspect.getmodule(self).__file__).parent.name.rsplit("_", 1)[1] - - #overridable , return [ [model, filename],... ] list - def get_model_filename_list(self): - return [] - - #overridable - def get_converter(self): - raise NotImplementedError - #return existing or your own converter which derived from base - - def get_target_iter(self): - return self.target_iter - - def is_reached_iter_goal(self): - return self.target_iter != 0 and self.iter >= self.target_iter - - #multi gpu in keras actually is fake and doesn't work for training https://github.com/keras-team/keras/issues/11976 - #def to_multi_gpu_model_if_possible (self, models_list): - # if len(self.device_config.gpu_idxs) > 1: - # #make batch_size to divide on GPU count without remainder - # self.batch_size = int( self.batch_size / len(self.device_config.gpu_idxs) ) - # if self.batch_size == 0: - # self.batch_size = 1 - # self.batch_size *= len(self.device_config.gpu_idxs) - # - # result = [] - # for model in models_list: - # for i in range( len(model.output_names) ): - # model.output_names = 'output_%d' % (i) - # result += [ nnlib.keras.utils.multi_gpu_model( model, self.device_config.gpu_idxs ) ] - # - # return result - # else: - # return models_list - - def get_previews(self): - return self.onGetPreview ( self.last_sample ) - - def get_static_preview(self): - return self.onGetPreview (self.sample_for_preview)[0][1] #first preview, and bgr - - def save(self): - summary_path = self.get_strpath_storage_for_file('summary.txt') - Path( summary_path ).write_text(self.model_summary_text) - self.onSave() - - model_data = { - 'iter': self.iter, - 'options': self.options, - 'loss_history': self.loss_history, - 'sample_for_preview' : self.sample_for_preview - } - self.model_data_path.write_bytes( pickle.dumps(model_data) ) - - bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ] - bckp_filename_list += [ str(summary_path), str(self.model_data_path) ] - - if self.autobackup: - current_hour = time.localtime().tm_hour - if self.autobackup_current_hour != current_hour: - self.autobackup_current_hour = current_hour - - for i in range(15,0,-1): - idx_str = '%.2d' % i - next_idx_str = '%.2d' % (i+1) - - idx_backup_path = self.autobackups_path / idx_str - next_idx_packup_path = self.autobackups_path / next_idx_str - - if idx_backup_path.exists(): - if i == 15: - Path_utils.delete_all_files(idx_backup_path) - else: - next_idx_packup_path.mkdir(exist_ok=True) - Path_utils.move_all_files (idx_backup_path, next_idx_packup_path) - - if i == 1: - idx_backup_path.mkdir(exist_ok=True) - for filename in bckp_filename_list: - shutil.copy ( str(filename), str(idx_backup_path / Path(filename).name) ) - - previews = self.get_previews() - plist = [] - for i in range(len(previews)): - name, bgr = previews[i] - plist += [ (bgr, idx_backup_path / ( ('preview_%s.jpg') % (name)) ) ] - - for preview, filepath in plist: - preview_lh = ModelBase.get_loss_history_preview(self.loss_history, self.iter, preview.shape[1], preview.shape[2]) - img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) - cv2_imwrite (filepath, img ) - - def load_weights_safe(self, model_filename_list, optimizer_filename_list=[]): - for model, filename in model_filename_list: - filename = self.get_strpath_storage_for_file(filename) - if Path(filename).exists(): - model.load_weights(filename) - - if len(optimizer_filename_list) != 0: - opt_filename = self.get_strpath_storage_for_file('opt.h5') - if Path(opt_filename).exists(): - try: - with open(opt_filename, "rb") as f: - d = pickle.loads(f.read()) - - for x in optimizer_filename_list: - opt, filename = x - if filename in d: - weights = d[filename].get('weights', None) - if weights: - opt.set_weights(weights) - print("set ok") - except Exception as e: - print ("Unable to load ", opt_filename) - - - def save_weights_safe(self, model_filename_list): - for model, filename in model_filename_list: - filename = self.get_strpath_storage_for_file(filename) - model.save_weights( filename + '.tmp' ) - - rename_list = model_filename_list - - """ - #unused - , optimizer_filename_list=[] - if len(optimizer_filename_list) != 0: - opt_filename = self.get_strpath_storage_for_file('opt.h5') - - try: - d = {} - for opt, filename in optimizer_filename_list: - fd = {} - symbolic_weights = getattr(opt, 'weights') - if symbolic_weights: - fd['weights'] = self.K.batch_get_value(symbolic_weights) - - d[filename] = fd - - with open(opt_filename+'.tmp', 'wb') as f: - f.write( pickle.dumps(d) ) - - rename_list += [('', 'opt.h5')] - except Exception as e: - print ("Unable to save ", opt_filename) - """ - - for _, filename in rename_list: - filename = self.get_strpath_storage_for_file(filename) - source_filename = Path(filename+'.tmp') - if source_filename.exists(): - target_filename = Path(filename) - if target_filename.exists(): - target_filename.unlink() - source_filename.rename ( str(target_filename) ) - - def debug_one_iter(self): - images = [] - for generator in self.generator_list: - for i,batch in enumerate(next(generator)): - if len(batch.shape) == 4: - images.append( batch[0] ) - - return imagelib.equalize_and_stack_square (images) - - def generate_next_sample(self): - return [next(generator) for generator in self.generator_list] - - def train_one_iter(self): - sample = self.generate_next_sample() - iter_time = time.time() - losses = self.onTrainOneIter(sample, self.generator_list) - iter_time = time.time() - iter_time - self.last_sample = sample - - self.loss_history.append ( [float(loss[1]) for loss in losses] ) - - if self.iter % 10 == 0: - plist = [] - - if io.is_colab(): - previews = self.get_previews() - for i in range(len(previews)): - name, bgr = previews[i] - plist += [ (bgr, self.get_strpath_storage_for_file('preview_%s.jpg' % (name) ) ) ] - - if self.write_preview_history: - plist += [ (self.get_static_preview(), str (self.preview_history_path / ('%.6d.jpg' % (self.iter))) ) ] - - for preview, filepath in plist: - preview_lh = ModelBase.get_loss_history_preview(self.loss_history, self.iter, preview.shape[1], preview.shape[2]) - img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) - cv2_imwrite (filepath, img ) - - - self.iter += 1 - - return self.iter, iter_time - - def pass_one_iter(self): - self.last_sample = self.generate_next_sample() - - def finalize(self): - nnlib.finalize_all() - - def is_first_run(self): - return self.iter == 0 - - def is_debug(self): - return self.debug - - def set_batch_size(self, batch_size): - self.batch_size = batch_size - - def get_batch_size(self): - return self.batch_size - - def get_iter(self): - return self.iter - - def get_loss_history(self): - return self.loss_history - - def set_training_data_generators (self, generator_list): - self.generator_list = generator_list - - def get_training_data_generators (self): - return self.generator_list - - def get_model_root_path(self): - return self.model_path - - def get_strpath_storage_for_file(self, filename): - if self.device_args['force_gpu_idx'] == -1: - return str( self.model_path / ( self.get_model_name() + '_' + filename) ) - else: - return str( self.model_path / ( str(self.device_args['force_gpu_idx']) + '_' + self.get_model_name() + '_' + filename) ) - - def set_vram_batch_requirements (self, d): - #example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48} - keys = [x for x in d.keys()] - - if self.device_config.cpu_only: - if self.batch_size == 0: - self.batch_size = 2 - else: - if self.batch_size == 0: - for x in keys: - if self.device_config.gpu_vram_gb[0] <= x: - self.batch_size = d[x] - break - - if self.batch_size == 0: - self.batch_size = d[ keys[-1] ] - - @staticmethod - def get_loss_history_preview(loss_history, iter, w, c): - loss_history = np.array (loss_history.copy()) - - lh_height = 100 - lh_img = np.ones ( (lh_height,w,c) ) * 0.1 - - if len(loss_history) != 0: - loss_count = len(loss_history[0]) - lh_len = len(loss_history) - - l_per_col = lh_len / w - plist_max = [ [ max (0.0, loss_history[int(col*l_per_col)][p], - *[ loss_history[i_ab][p] - for i_ab in range( int(col*l_per_col), int((col+1)*l_per_col) ) - ] - ) - for p in range(loss_count) - ] - for col in range(w) - ] - - plist_min = [ [ min (plist_max[col][p], loss_history[int(col*l_per_col)][p], - *[ loss_history[i_ab][p] - for i_ab in range( int(col*l_per_col), int((col+1)*l_per_col) ) - ] - ) - for p in range(loss_count) - ] - for col in range(w) - ] - - plist_abs_max = np.mean(loss_history[ len(loss_history) // 5 : ]) * 2 - - for col in range(0, w): - for p in range(0,loss_count): - point_color = [1.0]*c - point_color[0:3] = colorsys.hsv_to_rgb ( p * (1.0/loss_count), 1.0, 1.0 ) - - ph_max = int ( (plist_max[col][p] / plist_abs_max) * (lh_height-1) ) - ph_max = np.clip( ph_max, 0, lh_height-1 ) - - ph_min = int ( (plist_min[col][p] / plist_abs_max) * (lh_height-1) ) - ph_min = np.clip( ph_min, 0, lh_height-1 ) - - for ph in range(ph_min, ph_max+1): - lh_img[ (lh_height-ph-1), col ] = point_color - - lh_lines = 5 - lh_line_height = (lh_height-1)/lh_lines - for i in range(0,lh_lines+1): - lh_img[ int(i*lh_line_height), : ] = (0.8,)*c - - last_line_t = int((lh_lines-1)*lh_line_height) - last_line_b = int(lh_lines*lh_line_height) - - lh_text = 'Iter: %d' % (iter) if iter != 0 else '' - - lh_img[last_line_t:last_line_b, 0:w] += imagelib.get_text_image ( (last_line_b-last_line_t,w,c), lh_text, color=[0.8]*c ) - return lh_img +import colorsys +import inspect +import json +import os +import pickle +import shutil +import time +from pathlib import Path + +import cv2 +import numpy as np + +import imagelib +from interact import interact as io +from nnlib import nnlib +from samplelib import SampleGeneratorBase +from utils import Path_utils, std_utils +from utils.cv2_utils import * + +''' +You can implement your own model. Check examples. +''' +class ModelBase(object): + + + def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, pretraining_data_path=None, debug = False, device_args = None, + ask_enable_autobackup=True, + ask_write_preview_history=True, + ask_target_iter=True, + ask_batch_size=True, + ask_sort_by_yaw=True, + ask_random_flip=True, + ask_src_scale_mod=True): + + device_args['force_gpu_idx'] = device_args.get('force_gpu_idx',-1) + device_args['cpu_only'] = device_args.get('cpu_only',False) + + if device_args['force_gpu_idx'] == -1 and not device_args['cpu_only']: + idxs_names_list = nnlib.device.getValidDevicesIdxsWithNamesList() + if len(idxs_names_list) > 1: + io.log_info ("You have multi GPUs in a system: ") + for idx, name in idxs_names_list: + io.log_info ("[%d] : %s" % (idx, name) ) + + device_args['force_gpu_idx'] = io.input_int("Which GPU idx to choose? ( skip: best GPU ) : ", -1, [ x[0] for x in idxs_names_list] ) + self.device_args = device_args + + self.device_config = nnlib.DeviceConfig(allow_growth=False, **self.device_args) + + io.log_info ("Loading model...") + + self.model_path = model_path + self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat') ) + + self.training_data_src_path = training_data_src_path + self.training_data_dst_path = training_data_dst_path + self.pretraining_data_path = pretraining_data_path + + self.src_images_paths = None + self.dst_images_paths = None + self.src_yaw_images_paths = None + self.dst_yaw_images_paths = None + self.src_data_generator = None + self.dst_data_generator = None + self.debug = debug + self.is_training_mode = (training_data_src_path is not None and training_data_dst_path is not None) + + self.iter = 0 + self.options = {} + self.loss_history = [] + self.sample_for_preview = None + + model_data = {} + if self.model_data_path.exists(): + model_data = pickle.loads ( self.model_data_path.read_bytes() ) + self.iter = max( model_data.get('iter',0), model_data.get('epoch',0) ) + if 'epoch' in self.options: + self.options.pop('epoch') + if self.iter != 0: + self.options = model_data['options'] + self.loss_history = model_data.get('loss_history', []) + self.sample_for_preview = model_data.get('sample_for_preview', None) + + ask_override = self.is_training_mode and self.iter != 0 and io.input_in_time ("Press enter in 2 seconds to override model settings.", 5 if io.is_colab() else 2 ) + + yn_str = {True:'y',False:'n'} + + if self.iter == 0: + io.log_info ("\nModel first run. Enter model options as default for each run.") + + if ask_enable_autobackup and (self.iter == 0 or ask_override): + default_autobackup = False if self.iter == 0 else self.options.get('autobackup',False) + self.options['autobackup'] = io.input_bool("Enable autobackup? (y/n ?:help skip:%s) : " % (yn_str[default_autobackup]) , default_autobackup, help_message="Autobackup model files with preview every hour for last 15 hours. Latest backup located in model/<>_autobackups/01") + else: + self.options['autobackup'] = self.options.get('autobackup', False) + + if ask_write_preview_history and (self.iter == 0 or ask_override): + default_write_preview_history = False if self.iter == 0 else self.options.get('write_preview_history',False) + self.options['write_preview_history'] = io.input_bool("Write preview history? (y/n ?:help skip:%s) : " % (yn_str[default_write_preview_history]) , default_write_preview_history, help_message="Preview history will be writed to _history folder.") + else: + self.options['write_preview_history'] = self.options.get('write_preview_history', False) + + if (self.iter == 0 or ask_override) and self.options['write_preview_history'] and io.is_support_windows(): + choose_preview_history = io.input_bool("Choose image for the preview history? (y/n skip:%s) : " % (yn_str[False]) , False) + else: + choose_preview_history = False + + if ask_target_iter: + if (self.iter == 0 or ask_override): + self.options['target_iter'] = max(0, io.input_int("Target iteration (skip:unlimited/default) : ", 0)) + else: + self.options['target_iter'] = max(model_data.get('target_iter',0), self.options.get('target_epoch',0)) + if 'target_epoch' in self.options: + self.options.pop('target_epoch') + + if ask_batch_size and (self.iter == 0 or ask_override): + default_batch_size = 0 if self.iter == 0 else self.options.get('batch_size',0) + self.options['batch_size'] = max(0, io.input_int("Batch_size (?:help skip:%d) : " % (default_batch_size), default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually.")) + else: + self.options['batch_size'] = self.options.get('batch_size', 0) + + if ask_sort_by_yaw: + if (self.iter == 0 or ask_override): + default_sort_by_yaw = self.options.get('sort_by_yaw', False) + self.options['sort_by_yaw'] = io.input_bool("Feed faces to network sorted by yaw? (y/n ?:help skip:%s) : " % (yn_str[default_sort_by_yaw]), default_sort_by_yaw, help_message="NN will not learn src face directions that don't match dst face directions. Do not use if the dst face has hair that covers the jaw." ) + else: + self.options['sort_by_yaw'] = self.options.get('sort_by_yaw', False) + + if ask_random_flip: + if (self.iter == 0 or ask_override): + self.options['random_flip'] = io.input_bool("Flip faces randomly? (y/n ?:help skip:y) : ", True, help_message="Predicted face will look more naturally without this option, but src faceset should cover all face directions as dst faceset.") + else: + self.options['random_flip'] = self.options.get('random_flip', True) + + if ask_src_scale_mod: + if (self.iter == 0): + self.options['src_scale_mod'] = np.clip( io.input_int("Src face scale modifier % ( -30...30, ?:help skip:0) : ", 0, help_message="If src face shape is wider than dst, try to decrease this value to get a better result."), -30, 30) + else: + self.options['src_scale_mod'] = self.options.get('src_scale_mod', 0) + + self.autobackup = self.options.get('autobackup', False) + if not self.autobackup and 'autobackup' in self.options: + self.options.pop('autobackup') + + self.write_preview_history = self.options.get('write_preview_history', False) + if not self.write_preview_history and 'write_preview_history' in self.options: + self.options.pop('write_preview_history') + + self.target_iter = self.options.get('target_iter',0) + if self.target_iter == 0 and 'target_iter' in self.options: + self.options.pop('target_iter') + + self.batch_size = self.options.get('batch_size',0) + self.sort_by_yaw = self.options.get('sort_by_yaw',False) + self.random_flip = self.options.get('random_flip',True) + + self.src_scale_mod = self.options.get('src_scale_mod',0) + if self.src_scale_mod == 0 and 'src_scale_mod' in self.options: + self.options.pop('src_scale_mod') + + self.onInitializeOptions(self.iter == 0, ask_override) + + nnlib.import_all(self.device_config) + self.keras = nnlib.keras + self.K = nnlib.keras.backend + + self.onInitialize() + + self.options['batch_size'] = self.batch_size + + if self.debug or self.batch_size == 0: + self.batch_size = 1 + + if self.is_training_mode: + if self.device_args['force_gpu_idx'] == -1: + self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) ) + self.autobackups_path = self.model_path / ( '%s_autobackups' % (self.get_model_name()) ) + else: + self.preview_history_path = self.model_path / ( '%d_%s_history' % (self.device_args['force_gpu_idx'], self.get_model_name()) ) + self.autobackups_path = self.model_path / ( '%d_%s_autobackups' % (self.device_args['force_gpu_idx'], self.get_model_name()) ) + + if self.autobackup: + self.autobackup_current_hour = time.localtime().tm_hour + + if not self.autobackups_path.exists(): + self.autobackups_path.mkdir(exist_ok=True) + + if self.write_preview_history or io.is_colab(): + if not self.preview_history_path.exists(): + self.preview_history_path.mkdir(exist_ok=True) + else: + if self.iter == 0: + for filename in Path_utils.get_image_paths(self.preview_history_path): + Path(filename).unlink() + + if self.generator_list is None: + raise ValueError( 'You didnt set_training_data_generators()') + else: + for i, generator in enumerate(self.generator_list): + if not isinstance(generator, SampleGeneratorBase): + raise ValueError('training data generator is not subclass of SampleGeneratorBase') + + if self.sample_for_preview is None or choose_preview_history: + if choose_preview_history and io.is_support_windows(): + wnd_name = "[p] - next. [enter] - confirm." + io.named_window(wnd_name) + io.capture_keys(wnd_name) + choosed = False + while not choosed: + self.sample_for_preview = self.generate_next_sample() + preview = self.get_static_preview() + io.show_image( wnd_name, (preview*255).astype(np.uint8) ) + + while True: + key_events = io.get_key_events(wnd_name) + key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) + if key == ord('\n') or key == ord('\r'): + choosed = True + break + elif key == ord('p'): + break + + try: + io.process_messages(0.1) + except KeyboardInterrupt: + choosed = True + + io.destroy_window(wnd_name) + else: + self.sample_for_preview = self.generate_next_sample() + self.last_sample = self.sample_for_preview + model_summary_text = [] + + model_summary_text += ["===== Model summary ====="] + model_summary_text += ["== Model name: " + self.get_model_name()] + model_summary_text += ["=="] + model_summary_text += ["== Current iteration: " + str(self.iter)] + model_summary_text += ["=="] + model_summary_text += ["== Model options:"] + for key in self.options.keys(): + model_summary_text += ["== |== %s : %s" % (key, self.options[key])] + + if self.device_config.multi_gpu: + model_summary_text += ["== |== multi_gpu : True "] + + model_summary_text += ["== Running on:"] + if self.device_config.cpu_only: + model_summary_text += ["== |== [CPU]"] + else: + for idx in self.device_config.gpu_idxs: + model_summary_text += ["== |== [%d : %s]" % (idx, nnlib.device.getDeviceName(idx))] + + if not self.device_config.cpu_only and self.device_config.gpu_vram_gb[0] == 2: + model_summary_text += ["=="] + model_summary_text += ["== WARNING: You are using 2GB GPU. Result quality may be significantly decreased."] + model_summary_text += ["== If training does not start, close all programs and try again."] + model_summary_text += ["== Also you can disable Windows Aero Desktop to get extra free VRAM."] + model_summary_text += ["=="] + + model_summary_text += ["========================="] + model_summary_text = "\r\n".join (model_summary_text) + self.model_summary_text = model_summary_text + io.log_info(model_summary_text) + + #overridable + def onInitializeOptions(self, is_first_run, ask_override): + pass + + #overridable + def onInitialize(self): + ''' + initialize your keras models + + store and retrieve your model options in self.options[''] + + check example + ''' + pass + + #overridable + def onSave(self): + #save your keras models here + pass + + #overridable + def onTrainOneIter(self, sample, generator_list): + #train your keras models here + + #return array of losses + return ( ('loss_src', 0), ('loss_dst', 0) ) + + #overridable + def onGetPreview(self, sample): + #you can return multiple previews + #return [ ('preview_name',preview_rgb), ... ] + return [] + + #overridable if you want model name differs from folder name + def get_model_name(self): + return Path(inspect.getmodule(self).__file__).parent.name.rsplit("_", 1)[1] + + #overridable , return [ [model, filename],... ] list + def get_model_filename_list(self): + return [] + + #overridable + def get_converter(self): + raise NotImplementedError + #return existing or your own converter which derived from base + + def get_target_iter(self): + return self.target_iter + + def is_reached_iter_goal(self): + return self.target_iter != 0 and self.iter >= self.target_iter + + #multi gpu in keras actually is fake and doesn't work for training https://github.com/keras-team/keras/issues/11976 + #def to_multi_gpu_model_if_possible (self, models_list): + # if len(self.device_config.gpu_idxs) > 1: + # #make batch_size to divide on GPU count without remainder + # self.batch_size = int( self.batch_size / len(self.device_config.gpu_idxs) ) + # if self.batch_size == 0: + # self.batch_size = 1 + # self.batch_size *= len(self.device_config.gpu_idxs) + # + # result = [] + # for model in models_list: + # for i in range( len(model.output_names) ): + # model.output_names = 'output_%d' % (i) + # result += [ nnlib.keras.utils.multi_gpu_model( model, self.device_config.gpu_idxs ) ] + # + # return result + # else: + # return models_list + + def get_previews(self): + return self.onGetPreview ( self.last_sample ) + + def get_static_preview(self): + return self.onGetPreview (self.sample_for_preview)[0][1] #first preview, and bgr + + def save(self): + summary_path = self.get_strpath_storage_for_file('summary.txt') + Path( summary_path ).write_text(self.model_summary_text) + self.onSave() + + model_data = { + 'iter': self.iter, + 'options': self.options, + 'loss_history': self.loss_history, + 'sample_for_preview' : self.sample_for_preview + } + self.model_data_path.write_bytes( pickle.dumps(model_data) ) + + bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ] + bckp_filename_list += [ str(summary_path), str(self.model_data_path) ] + + if self.autobackup: + current_hour = time.localtime().tm_hour + if self.autobackup_current_hour != current_hour: + self.autobackup_current_hour = current_hour + + for i in range(15,0,-1): + idx_str = '%.2d' % i + next_idx_str = '%.2d' % (i+1) + + idx_backup_path = self.autobackups_path / idx_str + next_idx_packup_path = self.autobackups_path / next_idx_str + + if idx_backup_path.exists(): + if i == 15: + Path_utils.delete_all_files(idx_backup_path) + else: + next_idx_packup_path.mkdir(exist_ok=True) + Path_utils.move_all_files (idx_backup_path, next_idx_packup_path) + + if i == 1: + idx_backup_path.mkdir(exist_ok=True) + for filename in bckp_filename_list: + shutil.copy ( str(filename), str(idx_backup_path / Path(filename).name) ) + + previews = self.get_previews() + plist = [] + for i in range(len(previews)): + name, bgr = previews[i] + plist += [ (bgr, idx_backup_path / ( ('preview_%s.jpg') % (name)) ) ] + + for preview, filepath in plist: + preview_lh = ModelBase.get_loss_history_preview(self.loss_history, self.iter, preview.shape[1], preview.shape[2]) + img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) + cv2_imwrite (filepath, img ) + + def load_weights_safe(self, model_filename_list, optimizer_filename_list=[]): + for model, filename in model_filename_list: + filename = self.get_strpath_storage_for_file(filename) + if Path(filename).exists(): + model.load_weights(filename) + + if len(optimizer_filename_list) != 0: + opt_filename = self.get_strpath_storage_for_file('opt.h5') + if Path(opt_filename).exists(): + try: + with open(opt_filename, "rb") as f: + d = pickle.loads(f.read()) + + for x in optimizer_filename_list: + opt, filename = x + if filename in d: + weights = d[filename].get('weights', None) + if weights: + opt.set_weights(weights) + print("set ok") + except Exception as e: + print ("Unable to load ", opt_filename) + + + def save_weights_safe(self, model_filename_list): + for model, filename in model_filename_list: + filename = self.get_strpath_storage_for_file(filename) + model.save_weights( filename + '.tmp' ) + + rename_list = model_filename_list + + """ + #unused + , optimizer_filename_list=[] + if len(optimizer_filename_list) != 0: + opt_filename = self.get_strpath_storage_for_file('opt.h5') + + try: + d = {} + for opt, filename in optimizer_filename_list: + fd = {} + symbolic_weights = getattr(opt, 'weights') + if symbolic_weights: + fd['weights'] = self.K.batch_get_value(symbolic_weights) + + d[filename] = fd + + with open(opt_filename+'.tmp', 'wb') as f: + f.write( pickle.dumps(d) ) + + rename_list += [('', 'opt.h5')] + except Exception as e: + print ("Unable to save ", opt_filename) + """ + + for _, filename in rename_list: + filename = self.get_strpath_storage_for_file(filename) + source_filename = Path(filename+'.tmp') + if source_filename.exists(): + target_filename = Path(filename) + if target_filename.exists(): + target_filename.unlink() + source_filename.rename ( str(target_filename) ) + + def debug_one_iter(self): + images = [] + for generator in self.generator_list: + for i,batch in enumerate(next(generator)): + if len(batch.shape) == 4: + images.append( batch[0] ) + + return imagelib.equalize_and_stack_square (images) + + def generate_next_sample(self): + return [next(generator) for generator in self.generator_list] + + def train_one_iter(self): + sample = self.generate_next_sample() + iter_time = time.time() + losses = self.onTrainOneIter(sample, self.generator_list) + iter_time = time.time() - iter_time + self.last_sample = sample + + self.loss_history.append ( [float(loss[1]) for loss in losses] ) + + if self.iter % 10 == 0: + plist = [] + + if io.is_colab(): + previews = self.get_previews() + for i in range(len(previews)): + name, bgr = previews[i] + plist += [ (bgr, self.get_strpath_storage_for_file('preview_%s.jpg' % (name) ) ) ] + + if self.write_preview_history: + plist += [ (self.get_static_preview(), str (self.preview_history_path / ('%.6d.jpg' % (self.iter))) ) ] + + for preview, filepath in plist: + preview_lh = ModelBase.get_loss_history_preview(self.loss_history, self.iter, preview.shape[1], preview.shape[2]) + img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) + cv2_imwrite (filepath, img ) + + + self.iter += 1 + + return self.iter, iter_time + + def pass_one_iter(self): + self.last_sample = self.generate_next_sample() + + def finalize(self): + nnlib.finalize_all() + + def is_first_run(self): + return self.iter == 0 + + def is_debug(self): + return self.debug + + def set_batch_size(self, batch_size): + self.batch_size = batch_size + + def get_batch_size(self): + return self.batch_size + + def get_iter(self): + return self.iter + + def get_loss_history(self): + return self.loss_history + + def set_training_data_generators (self, generator_list): + self.generator_list = generator_list + + def get_training_data_generators (self): + return self.generator_list + + def get_model_root_path(self): + return self.model_path + + def get_strpath_storage_for_file(self, filename): + if self.device_args['force_gpu_idx'] == -1: + return str( self.model_path / ( self.get_model_name() + '_' + filename) ) + else: + return str( self.model_path / ( str(self.device_args['force_gpu_idx']) + '_' + self.get_model_name() + '_' + filename) ) + + def set_vram_batch_requirements (self, d): + #example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48} + keys = [x for x in d.keys()] + + if self.device_config.cpu_only: + if self.batch_size == 0: + self.batch_size = 2 + else: + if self.batch_size == 0: + for x in keys: + if self.device_config.gpu_vram_gb[0] <= x: + self.batch_size = d[x] + break + + if self.batch_size == 0: + self.batch_size = d[ keys[-1] ] + + @staticmethod + def get_loss_history_preview(loss_history, iter, w, c): + loss_history = np.array (loss_history.copy()) + + lh_height = 100 + lh_img = np.ones ( (lh_height,w,c) ) * 0.1 + + if len(loss_history) != 0: + loss_count = len(loss_history[0]) + lh_len = len(loss_history) + + l_per_col = lh_len / w + plist_max = [ [ max (0.0, loss_history[int(col*l_per_col)][p], + *[ loss_history[i_ab][p] + for i_ab in range( int(col*l_per_col), int((col+1)*l_per_col) ) + ] + ) + for p in range(loss_count) + ] + for col in range(w) + ] + + plist_min = [ [ min (plist_max[col][p], loss_history[int(col*l_per_col)][p], + *[ loss_history[i_ab][p] + for i_ab in range( int(col*l_per_col), int((col+1)*l_per_col) ) + ] + ) + for p in range(loss_count) + ] + for col in range(w) + ] + + plist_abs_max = np.mean(loss_history[ len(loss_history) // 5 : ]) * 2 + + for col in range(0, w): + for p in range(0,loss_count): + point_color = [1.0]*c + point_color[0:3] = colorsys.hsv_to_rgb ( p * (1.0/loss_count), 1.0, 1.0 ) + + ph_max = int ( (plist_max[col][p] / plist_abs_max) * (lh_height-1) ) + ph_max = np.clip( ph_max, 0, lh_height-1 ) + + ph_min = int ( (plist_min[col][p] / plist_abs_max) * (lh_height-1) ) + ph_min = np.clip( ph_min, 0, lh_height-1 ) + + for ph in range(ph_min, ph_max+1): + lh_img[ (lh_height-ph-1), col ] = point_color + + lh_lines = 5 + lh_line_height = (lh_height-1)/lh_lines + for i in range(0,lh_lines+1): + lh_img[ int(i*lh_line_height), : ] = (0.8,)*c + + last_line_t = int((lh_lines-1)*lh_line_height) + last_line_b = int(lh_lines*lh_line_height) + + lh_text = 'Iter: %d' % (iter) if iter != 0 else '' + + lh_img[last_line_t:last_line_b, 0:w] += imagelib.get_text_image ( (last_line_b-last_line_t,w,c), lh_text, color=[0.8]*c ) + return lh_img diff --git a/models/Model_DEV_FANSEG/Model.py b/models/Model_DEV_FANSEG/Model.py index 5ccb320..c095b27 100644 --- a/models/Model_DEV_FANSEG/Model.py +++ b/models/Model_DEV_FANSEG/Model.py @@ -1,102 +1,102 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from facelib import FANSegmentator -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_enable_autobackup=False, - ask_write_preview_history=False, - ask_target_iter=False, - ask_sort_by_yaw=False, - ask_random_flip=False, - ask_src_scale_mod=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - default_face_type = 'f' - if is_first_run: - self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="").lower() - else: - self.options['face_type'] = self.options.get('face_type', default_face_type) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {1.5:4} ) - - self.resolution = 256 - self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - - - self.fan_seg = FANSegmentator(self.resolution, - FaceType.toString(self.face_type), - load_weights=not self.is_first_run(), - weights_file_root=self.get_model_root_path(), - training=True) - - if self.is_training_mode: - t = SampleProcessor.Types - face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True), - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution' : self.resolution, 'motion_blur':(25, 1) }, - { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_M), 'resolution': self.resolution }, - ]), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True ), - output_sample_types=[ { 'types': (t.IMG_TRANSFORMED , face_type, t.MODE_BGR_SHUFFLE), 'resolution' : self.resolution}, - ]) - ]) - - #override - def onSave(self): - self.fan_seg.save_weights() - - #override - def onTrainOneIter(self, generators_samples, generators_list): - target_src, target_src_mask = generators_samples[0] - - loss = self.fan_seg.train_on_batch( [target_src], [target_src_mask] ) - - return ( ('loss', loss), ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][0][0:4] #first 4 samples - test_B = sample[1][0][0:4] #first 4 samples - - mAA = self.fan_seg.extract(test_A) - mBB = self.fan_seg.extract(test_B) - - mAA = np.repeat ( mAA, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - mAA[i], - test_A[i,:,:,0:3]*mAA[i], - ), axis=1) ) - - st2 = [] - for i in range(0, len(test_B)): - st2.append ( np.concatenate ( ( - test_B[i,:,:,0:3], - mBB[i], - test_B[i,:,:,0:3]*mBB[i], - ), axis=1) ) - - return [ ('training data', np.concatenate ( st, axis=0 ) ), - ('evaluating data', np.concatenate ( st2, axis=0 ) ), - ] +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from facelib import FANSegmentator +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, + ask_enable_autobackup=False, + ask_write_preview_history=False, + ask_target_iter=False, + ask_sort_by_yaw=False, + ask_random_flip=False, + ask_src_scale_mod=False) + + #override + def onInitializeOptions(self, is_first_run, ask_override): + default_face_type = 'f' + if is_first_run: + self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="").lower() + else: + self.options['face_type'] = self.options.get('face_type', default_face_type) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {1.5:4} ) + + self.resolution = 256 + self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF + + + self.fan_seg = FANSegmentator(self.resolution, + FaceType.toString(self.face_type), + load_weights=not self.is_first_run(), + weights_file_root=self.get_model_root_path(), + training=True) + + if self.is_training_mode: + t = SampleProcessor.Types + face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=True), + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution' : self.resolution, 'motion_blur':(25, 1) }, + { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_M), 'resolution': self.resolution }, + ]), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=True ), + output_sample_types=[ { 'types': (t.IMG_TRANSFORMED , face_type, t.MODE_BGR_SHUFFLE), 'resolution' : self.resolution}, + ]) + ]) + + #override + def onSave(self): + self.fan_seg.save_weights() + + #override + def onTrainOneIter(self, generators_samples, generators_list): + target_src, target_src_mask = generators_samples[0] + + loss = self.fan_seg.train_on_batch( [target_src], [target_src_mask] ) + + return ( ('loss', loss), ) + + #override + def onGetPreview(self, sample): + test_A = sample[0][0][0:4] #first 4 samples + test_B = sample[1][0][0:4] #first 4 samples + + mAA = self.fan_seg.extract(test_A) + mBB = self.fan_seg.extract(test_B) + + mAA = np.repeat ( mAA, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + mAA[i], + test_A[i,:,:,0:3]*mAA[i], + ), axis=1) ) + + st2 = [] + for i in range(0, len(test_B)): + st2.append ( np.concatenate ( ( + test_B[i,:,:,0:3], + mBB[i], + test_B[i,:,:,0:3]*mBB[i], + ), axis=1) ) + + return [ ('training data', np.concatenate ( st, axis=0 ) ), + ('evaluating data', np.concatenate ( st2, axis=0 ) ), + ] diff --git a/models/Model_DEV_FANSEG/__init__.py b/models/Model_DEV_FANSEG/__init__.py index 0188f11..704b01d 100644 --- a/models/Model_DEV_FANSEG/__init__.py +++ b/models/Model_DEV_FANSEG/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_DEV_POSEEST/Model.py b/models/Model_DEV_POSEEST/Model.py index ee2b18f..cf8bb7f 100644 --- a/models/Model_DEV_POSEEST/Model.py +++ b/models/Model_DEV_POSEEST/Model.py @@ -1,122 +1,122 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from facelib import PoseEstimator -from samplelib import * -from interact import interact as io -import imagelib - -class Model(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_enable_autobackup=False, - ask_write_preview_history=False, - ask_target_iter=False, - ask_sort_by_yaw=False, - ask_random_flip=False, - ask_src_scale_mod=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - yn_str = {True:'y',False:'n'} - - default_face_type = 'f' - if is_first_run: - self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() - else: - self.options['face_type'] = self.options.get('face_type', default_face_type) - - def_train_bgr = self.options.get('train_bgr', True) - if is_first_run or ask_override: - self.options['train_bgr'] = io.input_bool ("Train bgr? (y/n, ?:help skip: %s) : " % (yn_str[def_train_bgr]), def_train_bgr) - else: - self.options['train_bgr'] = self.options.get('train_bgr', def_train_bgr) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4:64} ) - - self.resolution = 128 - self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - - - self.pose_est = PoseEstimator(self.resolution, - FaceType.toString(self.face_type), - load_weights=not self.is_first_run(), - weights_file_root=self.get_model_root_path(), - training=True) - - if self.is_training_mode: - t = SampleProcessor.Types - face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, - sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, - output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) }, - {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution }, - {'types': (t.IMG_PITCH_YAW_ROLL,)} - ]), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, - sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, - output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':self.resolution }, - {'types': (t.IMG_PITCH_YAW_ROLL,)} - ]) - ]) - - #override - def onSave(self): - self.pose_est.save_weights() - - #override - def onTrainOneIter(self, generators_samples, generators_list): - target_srcw, target_src, pitch_yaw_roll = generators_samples[0] - - bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] ) - - return ( ('bgr_loss', bgr_loss), ('pyr_loss', pyr_loss), ) - - #override - def onGetPreview(self, generators_samples): - test_src = generators_samples[0][1][0:4] #first 4 samples - test_pyr_src = generators_samples[0][2][0:4] - test_dst = generators_samples[1][0][0:4] - test_pyr_dst = generators_samples[1][1][0:4] - - h,w,c = self.resolution,self.resolution,3 - h_line = 13 - - result = [] - for name, img, pyr in [ ['training data', test_src, test_pyr_src], \ - ['evaluating data',test_dst, test_pyr_dst] ]: - bgr_pred, pyr_pred = self.pose_est.extract(img) - - hor_imgs = [] - for i in range(len(img)): - img_info = np.ones ( (h,w,c) ) * 0.1 - - i_pyr = pyr[i] - i_pyr_pred = pyr_pred[i] - lines = ["%.4f %.4f %.4f" % (i_pyr[0],i_pyr[1],i_pyr[2]), - "%.4f %.4f %.4f" % (i_pyr_pred[0],i_pyr_pred[1],i_pyr_pred[2]) ] - - lines_count = len(lines) - for ln in range(lines_count): - img_info[ ln*h_line:(ln+1)*h_line, 0:w] += \ - imagelib.get_text_image ( (h_line,w,c), lines[ln], color=[0.8]*c ) - - hor_imgs.append ( np.concatenate ( ( - img[i,:,:,0:3], - bgr_pred[i], - img_info - ), axis=1) ) - - - result += [ (name, np.concatenate (hor_imgs, axis=0)) ] - +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from facelib import PoseEstimator +from samplelib import * +from interact import interact as io +import imagelib + +class Model(ModelBase): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, + ask_enable_autobackup=False, + ask_write_preview_history=False, + ask_target_iter=False, + ask_sort_by_yaw=False, + ask_random_flip=False, + ask_src_scale_mod=False) + + #override + def onInitializeOptions(self, is_first_run, ask_override): + yn_str = {True:'y',False:'n'} + + default_face_type = 'f' + if is_first_run: + self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() + else: + self.options['face_type'] = self.options.get('face_type', default_face_type) + + def_train_bgr = self.options.get('train_bgr', True) + if is_first_run or ask_override: + self.options['train_bgr'] = io.input_bool ("Train bgr? (y/n, ?:help skip: %s) : " % (yn_str[def_train_bgr]), def_train_bgr) + else: + self.options['train_bgr'] = self.options.get('train_bgr', def_train_bgr) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {4:64} ) + + self.resolution = 128 + self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF + + + self.pose_est = PoseEstimator(self.resolution, + FaceType.toString(self.face_type), + load_weights=not self.is_first_run(), + weights_file_root=self.get_model_root_path(), + training=True) + + if self.is_training_mode: + t = SampleProcessor.Types + face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, + sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, + output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) }, + {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution }, + {'types': (t.IMG_PITCH_YAW_ROLL,)} + ]), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, + sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, + output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':self.resolution }, + {'types': (t.IMG_PITCH_YAW_ROLL,)} + ]) + ]) + + #override + def onSave(self): + self.pose_est.save_weights() + + #override + def onTrainOneIter(self, generators_samples, generators_list): + target_srcw, target_src, pitch_yaw_roll = generators_samples[0] + + bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] ) + + return ( ('bgr_loss', bgr_loss), ('pyr_loss', pyr_loss), ) + + #override + def onGetPreview(self, generators_samples): + test_src = generators_samples[0][1][0:4] #first 4 samples + test_pyr_src = generators_samples[0][2][0:4] + test_dst = generators_samples[1][0][0:4] + test_pyr_dst = generators_samples[1][1][0:4] + + h,w,c = self.resolution,self.resolution,3 + h_line = 13 + + result = [] + for name, img, pyr in [ ['training data', test_src, test_pyr_src], \ + ['evaluating data',test_dst, test_pyr_dst] ]: + bgr_pred, pyr_pred = self.pose_est.extract(img) + + hor_imgs = [] + for i in range(len(img)): + img_info = np.ones ( (h,w,c) ) * 0.1 + + i_pyr = pyr[i] + i_pyr_pred = pyr_pred[i] + lines = ["%.4f %.4f %.4f" % (i_pyr[0],i_pyr[1],i_pyr[2]), + "%.4f %.4f %.4f" % (i_pyr_pred[0],i_pyr_pred[1],i_pyr_pred[2]) ] + + lines_count = len(lines) + for ln in range(lines_count): + img_info[ ln*h_line:(ln+1)*h_line, 0:w] += \ + imagelib.get_text_image ( (h_line,w,c), lines[ln], color=[0.8]*c ) + + hor_imgs.append ( np.concatenate ( ( + img[i,:,:,0:3], + bgr_pred[i], + img_info + ), axis=1) ) + + + result += [ (name, np.concatenate (hor_imgs, axis=0)) ] + return result \ No newline at end of file diff --git a/models/Model_DEV_POSEEST/__init__.py b/models/Model_DEV_POSEEST/__init__.py index 0188f11..704b01d 100644 --- a/models/Model_DEV_POSEEST/__init__.py +++ b/models/Model_DEV_POSEEST/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_DF/Model.py b/models/Model_DF/Model.py index fb98418..92d5cbe 100644 --- a/models/Model_DF/Model.py +++ b/models/Model_DF/Model.py @@ -1,171 +1,171 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4.5:4} ) - - ae_input_layer = Input(shape=(128, 128, 3)) - mask_layer = Input(shape=(128, 128, 1)) #same as output - - self.encoder, self.decoder_src, self.decoder_dst = self.Build(ae_input_layer) - - if not self.is_first_run(): - weights_to_load = [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - self.load_weights_safe(weights_to_load) - - rec_src = self.decoder_src(self.encoder(ae_input_layer)) - rec_dst = self.decoder_dst(self.encoder(ae_input_layer)) - self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src) - self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst) - - self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - - self.convert = K.function([ae_input_layer], rec_src) - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types=output_sample_types), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) - loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) - - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) - AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) - BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('DF', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - x, mx = self.convert ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_converter(self): - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=128, - face_type=FaceType.FULL, - base_erode_mask_modifier=30, - base_blur_mask_modifier=0) - - def Build(self, input_layer): - exec(nnlib.code_import_all, locals(), globals()) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(input_layer): - x = input_layer - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - - x = Dense(512)(Flatten()(x)) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - - return Model(input_layer, x) - - def Decoder(): - input_ = Input(shape=(16, 16, 512)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - - return Model(input_, [x,y]) - - return Encoder(input_layer), Decoder(), Decoder() +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {4.5:4} ) + + ae_input_layer = Input(shape=(128, 128, 3)) + mask_layer = Input(shape=(128, 128, 1)) #same as output + + self.encoder, self.decoder_src, self.decoder_dst = self.Build(ae_input_layer) + + if not self.is_first_run(): + weights_to_load = [ [self.encoder , 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + self.load_weights_safe(weights_to_load) + + rec_src = self.decoder_src(self.encoder(ae_input_layer)) + rec_dst = self.decoder_dst(self.encoder(ae_input_layer)) + self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src) + self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst) + + self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) + self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) + + self.convert = K.function([ae_input_layer], rec_src) + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types=output_sample_types), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types=output_sample_types) + ]) + + #override + def get_model_filename_list(self): + return [[self.encoder, 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5']] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, sample, generators_list): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('DF', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + x, mx = self.convert ( [ face[np.newaxis,...] ] ) + return x[0], mx[0][...,0] + + #override + def get_converter(self): + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=128, + face_type=FaceType.FULL, + base_erode_mask_modifier=30, + base_blur_mask_modifier=0) + + def Build(self, input_layer): + exec(nnlib.code_import_all, locals(), globals()) + + def downscale (dim): + def func(x): + return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) + return func + + def upscale (dim): + def func(x): + return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) + return func + + def Encoder(input_layer): + x = input_layer + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + + x = Dense(512)(Flatten()(x)) + x = Dense(8 * 8 * 512)(x) + x = Reshape((8, 8, 512))(x) + x = upscale(512)(x) + + return Model(input_layer, x) + + def Decoder(): + input_ = Input(shape=(16, 16, 512)) + x = input_ + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + + y = input_ #mask decoder + y = upscale(512)(y) + y = upscale(256)(y) + y = upscale(128)(y) + + x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + + return Model(input_, [x,y]) + + return Encoder(input_layer), Decoder(), Decoder() diff --git a/models/Model_DF/__init__.py b/models/Model_DF/__init__.py index 0188f11..704b01d 100644 --- a/models/Model_DF/__init__.py +++ b/models/Model_DF/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_H128/Model.py b/models/Model_H128/Model.py index 903014b..a50b930 100644 --- a/models/Model_H128/Model.py +++ b/models/Model_H128/Model.py @@ -1,205 +1,205 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run: - self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") - else: - default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future - if 'created_vram_gb' in self.options.keys(): - self.options.pop ('created_vram_gb') - self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) - - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {2.5:4} ) - - bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build( self.options['lighter_ae'] ) - if not self.is_first_run(): - weights_to_load = [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - self.load_weights_safe(weights_to_load) - - input_src_bgr = Input(bgr_shape) - input_src_mask = Input(mask_shape) - input_dst_bgr = Input(bgr_shape) - input_dst_mask = Input(mask_shape) - - rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) - rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) - - self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) - - self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), - loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) - - self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) - self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':128} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types=output_sample_types ), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types ) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_mask, warped_dst, target_dst_mask], [target_src, target_src_mask, target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.src_view([test_A]) - AB, mAB = self.src_view([test_B]) - BB, mBB = self.dst_view([test_B]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('H128', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - x, mx = self.src_view ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_converter(self): - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=128, - face_type=FaceType.HALF, - base_erode_mask_modifier=100, - base_blur_mask_modifier=100) - - def Build(self, lighter_ae): - exec(nnlib.code_import_all, locals(), globals()) - - bgr_shape = (128, 128, 3) - mask_shape = (128, 128, 1) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(input_shape): - input_layer = Input(input_shape) - x = input_layer - if not lighter_ae: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Dense(512)(Flatten()(x)) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - else: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Dense(256)(Flatten()(x)) - x = Dense(8 * 8 * 256)(x) - x = Reshape((8, 8, 256))(x) - x = upscale(256)(x) - - return Model(input_layer, x) - - def Decoder(): - if not lighter_ae: - input_ = Input(shape=(16, 16, 512)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - else: - input_ = Input(shape=(16, 16, 256)) - x = input_ - x = upscale(256)(x) - x = upscale(128)(x) - x = upscale(64)(x) - - y = input_ #mask decoder - y = upscale(256)(y) - y = upscale(128)(y) - y = upscale(64)(y) - - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - - - return Model(input_, [x,y]) - - return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run: + self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") + else: + default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future + if 'created_vram_gb' in self.options.keys(): + self.options.pop ('created_vram_gb') + self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) + + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {2.5:4} ) + + bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build( self.options['lighter_ae'] ) + if not self.is_first_run(): + weights_to_load = [ [self.encoder , 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + self.load_weights_safe(weights_to_load) + + input_src_bgr = Input(bgr_shape) + input_src_mask = Input(mask_shape) + input_dst_bgr = Input(bgr_shape) + input_dst_mask = Input(mask_shape) + + rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) + rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) + + self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) + + self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), + loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) + + self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) + self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':128} ] + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types=output_sample_types ), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types=output_sample_types ) + ]) + + #override + def get_model_filename_list(self): + return [[self.encoder, 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5']] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, sample, generators_list): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_mask, warped_dst, target_dst_mask], [target_src, target_src_mask, target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.src_view([test_A]) + AB, mAB = self.src_view([test_B]) + BB, mBB = self.dst_view([test_B]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('H128', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + x, mx = self.src_view ( [ face[np.newaxis,...] ] ) + return x[0], mx[0][...,0] + + #override + def get_converter(self): + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=128, + face_type=FaceType.HALF, + base_erode_mask_modifier=100, + base_blur_mask_modifier=100) + + def Build(self, lighter_ae): + exec(nnlib.code_import_all, locals(), globals()) + + bgr_shape = (128, 128, 3) + mask_shape = (128, 128, 1) + + def downscale (dim): + def func(x): + return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) + return func + + def upscale (dim): + def func(x): + return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) + return func + + def Encoder(input_shape): + input_layer = Input(input_shape) + x = input_layer + if not lighter_ae: + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + x = Dense(512)(Flatten()(x)) + x = Dense(8 * 8 * 512)(x) + x = Reshape((8, 8, 512))(x) + x = upscale(512)(x) + else: + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + x = Dense(256)(Flatten()(x)) + x = Dense(8 * 8 * 256)(x) + x = Reshape((8, 8, 256))(x) + x = upscale(256)(x) + + return Model(input_layer, x) + + def Decoder(): + if not lighter_ae: + input_ = Input(shape=(16, 16, 512)) + x = input_ + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + + y = input_ #mask decoder + y = upscale(512)(y) + y = upscale(256)(y) + y = upscale(128)(y) + else: + input_ = Input(shape=(16, 16, 256)) + x = input_ + x = upscale(256)(x) + x = upscale(128)(x) + x = upscale(64)(x) + + y = input_ #mask decoder + y = upscale(256)(y) + y = upscale(128)(y) + y = upscale(64)(y) + + x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + + + return Model(input_, [x,y]) + + return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() diff --git a/models/Model_H128/__init__.py b/models/Model_H128/__init__.py index 0188f11..704b01d 100644 --- a/models/Model_H128/__init__.py +++ b/models/Model_H128/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_H64/Model.py b/models/Model_H64/Model.py index 76fa5ff..f07bd05 100644 --- a/models/Model_H64/Model.py +++ b/models/Model_H64/Model.py @@ -1,202 +1,202 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run: - self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") - else: - default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future - if 'created_vram_gb' in self.options.keys(): - self.options.pop ('created_vram_gb') - self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) - - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {1.5:4} ) - - - bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.options['lighter_ae']) - - if not self.is_first_run(): - weights_to_load = [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - self.load_weights_safe(weights_to_load) - - input_src_bgr = Input(bgr_shape) - input_src_mask = Input(mask_shape) - input_dst_bgr = Input(bgr_shape) - input_dst_mask = Input(mask_shape) - - rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) - rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) - - self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) - - self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) - - self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) - self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':64} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types=output_sample_types), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_full_mask = sample[0] - warped_dst, target_dst, target_dst_full_mask = sample[1] - - total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_full_mask, warped_dst, target_dst_full_mask], [target_src, target_src_full_mask, target_dst, target_dst_full_mask] ) - - return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.src_view([test_A]) - AB, mAB = self.src_view([test_B]) - BB, mBB = self.dst_view([test_B]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('H64', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - x, mx = self.src_view ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_converter(self): - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=64, - face_type=FaceType.HALF, - base_erode_mask_modifier=100, - base_blur_mask_modifier=100) - - def Build(self, lighter_ae): - exec(nnlib.code_import_all, locals(), globals()) - - bgr_shape = (64, 64, 3) - mask_shape = (64, 64, 1) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(input_shape): - input_layer = Input(input_shape) - x = input_layer - if not lighter_ae: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Dense(1024)(Flatten()(x)) - x = Dense(4 * 4 * 1024)(x) - x = Reshape((4, 4, 1024))(x) - x = upscale(512)(x) - else: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(768)(x) - x = Dense(512)(Flatten()(x)) - x = Dense(4 * 4 * 512)(x) - x = Reshape((4, 4, 512))(x) - x = upscale(256)(x) - return Model(input_layer, x) - - def Decoder(): - if not lighter_ae: - input_ = Input(shape=(8, 8, 512)) - x = input_ - - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - else: - input_ = Input(shape=(8, 8, 256)) - - x = input_ - x = upscale(256)(x) - x = upscale(128)(x) - x = upscale(64)(x) - - y = input_ #mask decoder - y = upscale(256)(y) - y = upscale(128)(y) - y = upscale(64)(y) - - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - - return Model(input_, [x,y]) - - return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run: + self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") + else: + default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future + if 'created_vram_gb' in self.options.keys(): + self.options.pop ('created_vram_gb') + self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) + + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {1.5:4} ) + + + bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.options['lighter_ae']) + + if not self.is_first_run(): + weights_to_load = [ [self.encoder , 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + self.load_weights_safe(weights_to_load) + + input_src_bgr = Input(bgr_shape) + input_src_mask = Input(mask_shape) + input_dst_bgr = Input(bgr_shape) + input_dst_mask = Input(mask_shape) + + rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) + rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) + + self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) + + self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) + + self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) + self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':64} ] + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types=output_sample_types), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types=output_sample_types) + ]) + + #override + def get_model_filename_list(self): + return [[self.encoder, 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5']] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, sample, generators_list): + warped_src, target_src, target_src_full_mask = sample[0] + warped_dst, target_dst, target_dst_full_mask = sample[1] + + total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_full_mask, warped_dst, target_dst_full_mask], [target_src, target_src_full_mask, target_dst, target_dst_full_mask] ) + + return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.src_view([test_A]) + AB, mAB = self.src_view([test_B]) + BB, mBB = self.dst_view([test_B]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('H64', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + x, mx = self.src_view ( [ face[np.newaxis,...] ] ) + return x[0], mx[0][...,0] + + #override + def get_converter(self): + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=64, + face_type=FaceType.HALF, + base_erode_mask_modifier=100, + base_blur_mask_modifier=100) + + def Build(self, lighter_ae): + exec(nnlib.code_import_all, locals(), globals()) + + bgr_shape = (64, 64, 3) + mask_shape = (64, 64, 1) + + def downscale (dim): + def func(x): + return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) + return func + + def upscale (dim): + def func(x): + return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) + return func + + def Encoder(input_shape): + input_layer = Input(input_shape) + x = input_layer + if not lighter_ae: + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + x = Dense(1024)(Flatten()(x)) + x = Dense(4 * 4 * 1024)(x) + x = Reshape((4, 4, 1024))(x) + x = upscale(512)(x) + else: + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(768)(x) + x = Dense(512)(Flatten()(x)) + x = Dense(4 * 4 * 512)(x) + x = Reshape((4, 4, 512))(x) + x = upscale(256)(x) + return Model(input_layer, x) + + def Decoder(): + if not lighter_ae: + input_ = Input(shape=(8, 8, 512)) + x = input_ + + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + + else: + input_ = Input(shape=(8, 8, 256)) + + x = input_ + x = upscale(256)(x) + x = upscale(128)(x) + x = upscale(64)(x) + + y = input_ #mask decoder + y = upscale(256)(y) + y = upscale(128)(y) + y = upscale(64)(y) + + x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + + return Model(input_, [x,y]) + + return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() diff --git a/models/Model_H64/__init__.py b/models/Model_H64/__init__.py index 0188f11..704b01d 100644 --- a/models/Model_H64/__init__.py +++ b/models/Model_H64/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_LIAEF128/Model.py b/models/Model_LIAEF128/Model.py index 5fc03af..36f5658 100644 --- a/models/Model_LIAEF128/Model.py +++ b/models/Model_LIAEF128/Model.py @@ -1,180 +1,180 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4.5:4} ) - - ae_input_layer = Input(shape=(128, 128, 3)) - mask_layer = Input(shape=(128, 128, 1)) #same as output - - self.encoder, self.decoder, self.inter_B, self.inter_AB = self.Build(ae_input_layer) - - if not self.is_first_run(): - weights_to_load = [ [self.encoder, 'encoder.h5'], - [self.decoder, 'decoder.h5'], - [self.inter_B, 'inter_B.h5'], - [self.inter_AB, 'inter_AB.h5'] - ] - self.load_weights_safe(weights_to_load) - - code = self.encoder(ae_input_layer) - AB = self.inter_AB(code) - B = self.inter_B(code) - rec_src = self.decoder(Concatenate()([AB, AB])) - rec_dst = self.decoder(Concatenate()([B, AB])) - self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src ) - self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst ) - - self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - - self.convert = K.function([ae_input_layer],rec_src) - - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types=output_sample_types), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder, 'decoder.h5'], - [self.inter_B, 'inter_B.h5'], - [self.inter_AB, 'inter_AB.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) - loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) - - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) - AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) - BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('LIAEF128', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - x, mx = self.convert ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_converter(self): - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=128, - face_type=FaceType.FULL, - base_erode_mask_modifier=30, - base_blur_mask_modifier=0) - - def Build(self, input_layer): - exec(nnlib.code_import_all, locals(), globals()) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(): - x = input_layer - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Flatten()(x) - return Model(input_layer, x) - - def Intermediate(): - input_layer = Input(shape=(None, 8 * 8 * 1024)) - x = input_layer - x = Dense(256)(x) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - return Model(input_layer, x) - - def Decoder(): - input_ = Input(shape=(16, 16, 1024)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid' )(y) - - return Model(input_, [x,y]) - - return Encoder(), Decoder(), Intermediate(), Intermediate() +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {4.5:4} ) + + ae_input_layer = Input(shape=(128, 128, 3)) + mask_layer = Input(shape=(128, 128, 1)) #same as output + + self.encoder, self.decoder, self.inter_B, self.inter_AB = self.Build(ae_input_layer) + + if not self.is_first_run(): + weights_to_load = [ [self.encoder, 'encoder.h5'], + [self.decoder, 'decoder.h5'], + [self.inter_B, 'inter_B.h5'], + [self.inter_AB, 'inter_AB.h5'] + ] + self.load_weights_safe(weights_to_load) + + code = self.encoder(ae_input_layer) + AB = self.inter_AB(code) + B = self.inter_B(code) + rec_src = self.decoder(Concatenate()([AB, AB])) + rec_dst = self.decoder(Concatenate()([B, AB])) + self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src ) + self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst ) + + self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) + self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) + + self.convert = K.function([ae_input_layer],rec_src) + + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types=output_sample_types), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types=output_sample_types) + ]) + + #override + def get_model_filename_list(self): + return [[self.encoder, 'encoder.h5'], + [self.decoder, 'decoder.h5'], + [self.inter_B, 'inter_B.h5'], + [self.inter_AB, 'inter_AB.h5']] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, sample, generators_list): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('LIAEF128', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + x, mx = self.convert ( [ face[np.newaxis,...] ] ) + return x[0], mx[0][...,0] + + #override + def get_converter(self): + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=128, + face_type=FaceType.FULL, + base_erode_mask_modifier=30, + base_blur_mask_modifier=0) + + def Build(self, input_layer): + exec(nnlib.code_import_all, locals(), globals()) + + def downscale (dim): + def func(x): + return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) + return func + + def upscale (dim): + def func(x): + return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) + return func + + def Encoder(): + x = input_layer + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + x = Flatten()(x) + return Model(input_layer, x) + + def Intermediate(): + input_layer = Input(shape=(None, 8 * 8 * 1024)) + x = input_layer + x = Dense(256)(x) + x = Dense(8 * 8 * 512)(x) + x = Reshape((8, 8, 512))(x) + x = upscale(512)(x) + return Model(input_layer, x) + + def Decoder(): + input_ = Input(shape=(16, 16, 1024)) + x = input_ + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + + y = input_ #mask decoder + y = upscale(512)(y) + y = upscale(256)(y) + y = upscale(128)(y) + y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid' )(y) + + return Model(input_, [x,y]) + + return Encoder(), Decoder(), Intermediate(), Intermediate() diff --git a/models/Model_LIAEF128/__init__.py b/models/Model_LIAEF128/__init__.py index 0188f11..704b01d 100644 --- a/models/Model_LIAEF128/__init__.py +++ b/models/Model_LIAEF128/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_RecycleGAN/Model.py b/models/Model_RecycleGAN/Model.py index 6f9d1c8..b78aece 100644 --- a/models/Model_RecycleGAN/Model.py +++ b/models/Model_RecycleGAN/Model.py @@ -1,483 +1,483 @@ -from functools import partial - -import cv2 -import numpy as np - -from facelib import FaceType -from interact import interact as io -from mathlib import get_power_of_two -from models import ModelBase -from nnlib import nnlib -from samplelib import * - -class RecycleGANModel(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_sort_by_yaw=False, - ask_random_flip=False, - ask_src_scale_mod=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run: - self.options['resolution'] = io.input_int("Resolution ( 128,256 ?:help skip:128) : ", 128, [128,256], help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") - else: - self.options['resolution'] = self.options.get('resolution', 128) - - #override - def onInitialize(self, batch_size=-1, **in_options): - exec(nnlib.code_import_all, locals(), globals()) - self.set_vram_batch_requirements({6:16}) - - resolution = self.options['resolution'] - bgr_shape = (resolution, resolution, 3) - ngf = 64 - npf = 32 - ndf = 64 - lambda_A = 10 - lambda_B = 10 - - use_batch_norm = True #created_batch_size > 1 - self.GA = modelify(RecycleGANModel.ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) - self.GB = modelify(RecycleGANModel.ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) - - #self.GA = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) - #self.GB = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) - - self.PA = modelify(RecycleGANModel.UNetTemporalPredictor(bgr_shape[2], use_batch_norm, ngf=npf))([Input(bgr_shape), Input(bgr_shape)]) - self.PB = modelify(RecycleGANModel.UNetTemporalPredictor(bgr_shape[2], use_batch_norm, ngf=npf))([Input(bgr_shape), Input(bgr_shape)]) - - self.DA = modelify(RecycleGANModel.PatchDiscriminator(ndf=ndf) ) (Input(bgr_shape)) - self.DB = modelify(RecycleGANModel.PatchDiscriminator(ndf=ndf) ) (Input(bgr_shape)) - - if not self.is_first_run(): - weights_to_load = [ - (self.GA, 'GA.h5'), - (self.DA, 'DA.h5'), - (self.PA, 'PA.h5'), - (self.GB, 'GB.h5'), - (self.DB, 'DB.h5'), - (self.PB, 'PB.h5'), - ] - self.load_weights_safe(weights_to_load) - - real_A0 = Input(bgr_shape, name="real_A0") - real_A1 = Input(bgr_shape, name="real_A1") - real_A2 = Input(bgr_shape, name="real_A2") - - real_B0 = Input(bgr_shape, name="real_B0") - real_B1 = Input(bgr_shape, name="real_B1") - real_B2 = Input(bgr_shape, name="real_B2") - - DA_ones = K.ones_like ( K.shape(self.DA.outputs[0]) ) - DA_zeros = K.zeros_like ( K.shape(self.DA.outputs[0] )) - DB_ones = K.ones_like ( K.shape(self.DB.outputs[0] )) - DB_zeros = K.zeros_like ( K.shape(self.DB.outputs[0] )) - - def DLoss(labels,logits): - return K.mean(K.binary_crossentropy(labels,logits)) - - def CycleLoss (t1,t2): - return K.mean(K.abs(t1 - t2)) - - def RecurrentLOSS(t1,t2): - return K.mean(K.abs(t1 - t2)) - - def RecycleLOSS(t1,t2): - return K.mean(K.abs(t1 - t2)) - - fake_B0 = self.GA(real_A0) - fake_B1 = self.GA(real_A1) - - fake_A0 = self.GB(real_B0) - fake_A1 = self.GB(real_B1) - - real_A0_d = self.DA(real_A0) - real_A0_d_ones = K.ones_like(real_A0_d) - real_A1_d = self.DA(real_A1) - real_A1_d_ones = K.ones_like(real_A1_d) - - fake_A0_d = self.DA(fake_A0) - fake_A0_d_ones = K.ones_like(fake_A0_d) - fake_A0_d_zeros = K.zeros_like(fake_A0_d) - - fake_A1_d = self.DA(fake_A1) - fake_A1_d_ones = K.ones_like(fake_A1_d) - fake_A1_d_zeros = K.zeros_like(fake_A1_d) - - real_B0_d = self.DB(real_B0) - real_B0_d_ones = K.ones_like(real_B0_d) - - real_B1_d = self.DB(real_B1) - real_B1_d_ones = K.ones_like(real_B1_d) - - fake_B0_d = self.DB(fake_B0) - fake_B0_d_ones = K.ones_like(fake_B0_d) - fake_B0_d_zeros = K.zeros_like(fake_B0_d) - - fake_B1_d = self.DB(fake_B1) - fake_B1_d_ones = K.ones_like(fake_B1_d) - fake_B1_d_zeros = K.zeros_like(fake_B1_d) - - pred_A2 = self.PA ( [real_A0, real_A1]) - pred_B2 = self.PB ( [real_B0, real_B1]) - rec_A2 = self.GB ( self.PB ( [fake_B0, fake_B1]) ) - rec_B2 = self.GA ( self.PA ( [fake_A0, fake_A1])) - - - loss_GA = DLoss(fake_B0_d_ones, fake_B0_d ) + \ - DLoss(fake_B1_d_ones, fake_B1_d ) + \ - lambda_A * (RecurrentLOSS(pred_A2, real_A2) + \ - RecycleLOSS(rec_B2, real_B2) ) - - - weights_GA = self.GA.trainable_weights + self.PA.trainable_weights - - loss_GB = DLoss(fake_A0_d_ones, fake_A0_d ) + \ - DLoss(fake_A1_d_ones, fake_A1_d ) + \ - lambda_B * (RecurrentLOSS(pred_B2, real_B2) + \ - RecycleLOSS(rec_A2, real_A2) ) - - weights_GB = self.GB.trainable_weights + self.PB.trainable_weights - - def opt(): - return Adam(lr=2e-4, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2)#, clipnorm=1) - - self.GA_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_GA], - opt().get_updates(loss_GA, weights_GA) ) - - self.GB_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_GB], - opt().get_updates(loss_GB, weights_GB) ) - - ########### - - loss_D_A0 = ( DLoss(real_A0_d_ones, real_A0_d ) + \ - DLoss(fake_A0_d_zeros, fake_A0_d ) ) * 0.5 - - loss_D_A1 = ( DLoss(real_A1_d_ones, real_A1_d ) + \ - DLoss(fake_A1_d_zeros, fake_A1_d ) ) * 0.5 - - loss_D_A = loss_D_A0 + loss_D_A1 - - self.DA_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_A], - opt().get_updates(loss_D_A, self.DA.trainable_weights) ) - - ############ - - loss_D_B0 = ( DLoss(real_B0_d_ones, real_B0_d ) + \ - DLoss(fake_B0_d_zeros, fake_B0_d ) ) * 0.5 - - loss_D_B1 = ( DLoss(real_B1_d_ones, real_B1_d ) + \ - DLoss(fake_B1_d_zeros, fake_B1_d ) ) * 0.5 - - loss_D_B = loss_D_B0 + loss_D_B1 - - self.DB_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_B], - opt().get_updates(loss_D_B, self.DB.trainable_weights) ) - - ############ - - - self.G_view = K.function([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 ]) - - - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_SOURCE, t.MODE_BGR), 'resolution':resolution, 'normalize_tanh' : True} ] - - self.set_training_data_generators ([ - SampleGeneratorImageTemporal(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - temporal_image_count=3, - sample_process_options=SampleProcessor.Options(random_flip = False), - output_sample_types=output_sample_types ), - - SampleGeneratorImageTemporal(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - temporal_image_count=3, - sample_process_options=SampleProcessor.Options(random_flip = False), - output_sample_types=output_sample_types ), - ]) - else: - self.G_convert = K.function([real_B0],[fake_A0]) - - #override - def get_model_filename_list(self): - return [ [self.GA, 'GA.h5'], - [self.GB, 'GB.h5'], - [self.DA, 'DA.h5'], - [self.DB, 'DB.h5'], - [self.PA, 'PA.h5'], - [self.PB, 'PB.h5'] ] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, generators_samples, generators_list): - source_src_0, source_src_1, source_src_2, = generators_samples[0] - source_dst_0, source_dst_1, source_dst_2, = generators_samples[1] - - feed = [source_src_0, source_src_1, source_src_2, source_dst_0, source_dst_1, source_dst_2] - - loss_GA, = self.GA_train ( feed ) - loss_GB, = self.GB_train ( feed ) - loss_DA, = self.DA_train( feed ) - loss_DB, = self.DB_train( feed ) - - return ( ('GA', loss_GA), ('GB', loss_GB), ('DA', loss_DA), ('DB', loss_DB) ) - - #override - def onGetPreview(self, sample): - test_A0 = sample[0][0] - test_A1 = sample[0][1] - test_A2 = sample[0][2] - - test_B0 = sample[1][0] - test_B1 = sample[1][1] - test_B2 = sample[1][2] - - G_view_result = self.G_view([test_A0, test_A1, test_A2, test_B0, test_B1, test_B2]) - - fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 = [ x[0] / 2 + 0.5 for x in G_view_result] - test_A0, test_A1, test_A2, test_B0, test_B1, test_B2 = [ x[0] / 2 + 0.5 for x in [test_A0, test_A1, test_A2, test_B0, test_B1, test_B2] ] - - r = np.concatenate ((np.concatenate ( (test_A0, test_A1, test_A2, pred_A2, fake_B0, fake_B1, rec_A2), axis=1), - np.concatenate ( (test_B0, test_B1, test_B2, pred_B2, fake_A0, fake_A1, rec_B2), axis=1) - ), axis=0) - - return [ ('RecycleGAN', r ) ] - - def predictor_func (self, face): - x = self.G_convert ( [ face[np.newaxis,...]*2-1 ] )[0] - return np.clip ( x[0] / 2 + 0.5 , 0, 1) - - #override - def get_converter(self, **in_options): - from converters import ConverterImage - return ConverterImage(self.predictor_func, - predictor_input_size=self.options['resolution'], - **in_options) - - @staticmethod - def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=-1)(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=-1)(x) - - XConv2D = partial(Conv2D, padding='same', use_bias=use_bias) - XConv2DTranspose = partial(Conv2DTranspose, padding='same', use_bias=use_bias) - - def func(input): - - - def ResnetBlock(dim, use_dropout=False): - def func(input): - x = input - - x = XConv2D(dim, 3, strides=1)(x) - x = XNormalization(x) - x = ReLU()(x) - - if use_dropout: - x = Dropout(0.5)(x) - - x = XConv2D(dim, 3, strides=1)(x) - x = XNormalization(x) - x = ReLU()(x) - return Add()([x,input]) - return func - - x = input - - x = ReLU()(XNormalization(XConv2D(ngf, 7, strides=1)(x))) - - x = ReLU()(XNormalization(XConv2D(ngf*2, 3, strides=2)(x))) - x = ReLU()(XNormalization(XConv2D(ngf*4, 3, strides=2)(x))) - - for i in range(n_blocks): - x = ResnetBlock(ngf*4, use_dropout=use_dropout)(x) - - x = ReLU()(XNormalization(XConv2DTranspose(ngf*2, 3, strides=2)(x))) - x = ReLU()(XNormalization(XConv2DTranspose(ngf , 3, strides=2)(x))) - - x = XConv2D(output_nc, 7, strides=1, activation='tanh', use_bias=True)(x) - - return x - - return func - - @staticmethod - def UNet(output_nc, use_batch_norm, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalizationL(): - return InstanceNormalization (axis=-1) - else: - use_bias = False - def XNormalizationL(): - return BatchNormalization (axis=-1) - - def XNormalization(x): - return XNormalizationL()(x) - - XConv2D = partial(Conv2D, padding='same', use_bias=use_bias) - XConv2DTranspose = partial(Conv2DTranspose, padding='same', use_bias=use_bias) - - def func(input): - - b,h,w,c = K.int_shape(input) - - n_downs = get_power_of_two(w) - 4 - - Norm = XNormalizationL() - Norm2 = XNormalizationL() - Norm4 = XNormalizationL() - Norm8 = XNormalizationL() - - x = input - - x = e1 = XConv2D( ngf, 4, strides=2, use_bias=True ) (x) - - x = e2 = Norm2( XConv2D( ngf*2, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) - x = e3 = Norm4( XConv2D( ngf*4, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) - - l = [] - for i in range(n_downs): - x = Norm8( XConv2D( ngf*8, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) - l += [x] - - x = XConv2D( ngf*8, 4, strides=2, use_bias=True )( LeakyReLU(0.2)(x) ) - - for i in range(n_downs): - x = Norm8( XConv2DTranspose( ngf*8, 4, strides=2 )( ReLU()(x) ) ) - if i <= n_downs-2: - x = Dropout(0.5)(x) - x = Concatenate(axis=-1)([x, l[-i-1] ]) - - x = Norm4( XConv2DTranspose( ngf*4, 4, strides=2 )( ReLU()(x) ) ) - x = Concatenate(axis=-1)([x, e3]) - - x = Norm2( XConv2DTranspose( ngf*2, 4, strides=2 )( ReLU()(x) ) ) - x = Concatenate(axis=-1)([x, e2]) - - x = Norm( XConv2DTranspose( ngf, 4, strides=2 )( ReLU()(x) ) ) - x = Concatenate(axis=-1)([x, e1]) - - x = XConv2DTranspose(output_nc, 4, strides=2, activation='tanh', use_bias=True)( ReLU()(x) ) - - return x - return func - nnlib.UNet = UNet - - @staticmethod - def UNetTemporalPredictor(output_nc, use_batch_norm, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - def func(inputs): - past_2_image_tensor, past_1_image_tensor = inputs - - x = Concatenate(axis=-1)([ past_2_image_tensor, past_1_image_tensor ]) - x = UNet(3, use_batch_norm, ngf=ngf, use_dropout=use_dropout) (x) - - return x - - return func - - @staticmethod - def PatchDiscriminator(ndf=64): - exec (nnlib.import_all(), locals(), globals()) - - #use_bias = True - #def XNormalization(x): - # return InstanceNormalization (axis=-1)(x) - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=-1)(x) - - XConv2D = partial(Conv2D, use_bias=use_bias) - - def func(input): - b,h,w,c = K.int_shape(input) - - x = input - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf, 4, strides=2, padding='valid', use_bias=True)(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf*2, 4, strides=2, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf*4, 4, strides=2, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf*8, 4, strides=2, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf*8, 4, strides=2, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - return XConv2D( 1, 4, strides=1, padding='valid', use_bias=True, activation='sigmoid')(x)# - return func - - @staticmethod - def NLayerDiscriminator(ndf=64, n_layers=3): - exec (nnlib.import_all(), locals(), globals()) - - #use_bias = True - #def XNormalization(x): - # return InstanceNormalization (axis=-1)(x) - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=-1)(x) - - XConv2D = partial(Conv2D, use_bias=use_bias) - - def func(input): - b,h,w,c = K.int_shape(input) - - x = input - - f = ndf - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( f, 4, strides=2, padding='valid', use_bias=True)(x) - f = min( ndf*8, f*2 ) - x = LeakyReLU(0.2)(x) - - for i in range(n_layers): - x = ZeroPadding2D((1,1))(x) - x = XConv2D( f, 4, strides=2, padding='valid')(x) - f = min( ndf*8, f*2 ) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( f, 4, strides=1, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - return XConv2D( 1, 4, strides=1, padding='valid', use_bias=True, activation='sigmoid')(x)# - return func - -Model = RecycleGANModel +from functools import partial + +import cv2 +import numpy as np + +from facelib import FaceType +from interact import interact as io +from mathlib import get_power_of_two +from models import ModelBase +from nnlib import nnlib +from samplelib import * + +class RecycleGANModel(ModelBase): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, + ask_sort_by_yaw=False, + ask_random_flip=False, + ask_src_scale_mod=False) + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run: + self.options['resolution'] = io.input_int("Resolution ( 128,256 ?:help skip:128) : ", 128, [128,256], help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") + else: + self.options['resolution'] = self.options.get('resolution', 128) + + #override + def onInitialize(self, batch_size=-1, **in_options): + exec(nnlib.code_import_all, locals(), globals()) + self.set_vram_batch_requirements({6:16}) + + resolution = self.options['resolution'] + bgr_shape = (resolution, resolution, 3) + ngf = 64 + npf = 32 + ndf = 64 + lambda_A = 10 + lambda_B = 10 + + use_batch_norm = True #created_batch_size > 1 + self.GA = modelify(RecycleGANModel.ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) + self.GB = modelify(RecycleGANModel.ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) + + #self.GA = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) + #self.GB = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) + + self.PA = modelify(RecycleGANModel.UNetTemporalPredictor(bgr_shape[2], use_batch_norm, ngf=npf))([Input(bgr_shape), Input(bgr_shape)]) + self.PB = modelify(RecycleGANModel.UNetTemporalPredictor(bgr_shape[2], use_batch_norm, ngf=npf))([Input(bgr_shape), Input(bgr_shape)]) + + self.DA = modelify(RecycleGANModel.PatchDiscriminator(ndf=ndf) ) (Input(bgr_shape)) + self.DB = modelify(RecycleGANModel.PatchDiscriminator(ndf=ndf) ) (Input(bgr_shape)) + + if not self.is_first_run(): + weights_to_load = [ + (self.GA, 'GA.h5'), + (self.DA, 'DA.h5'), + (self.PA, 'PA.h5'), + (self.GB, 'GB.h5'), + (self.DB, 'DB.h5'), + (self.PB, 'PB.h5'), + ] + self.load_weights_safe(weights_to_load) + + real_A0 = Input(bgr_shape, name="real_A0") + real_A1 = Input(bgr_shape, name="real_A1") + real_A2 = Input(bgr_shape, name="real_A2") + + real_B0 = Input(bgr_shape, name="real_B0") + real_B1 = Input(bgr_shape, name="real_B1") + real_B2 = Input(bgr_shape, name="real_B2") + + DA_ones = K.ones_like ( K.shape(self.DA.outputs[0]) ) + DA_zeros = K.zeros_like ( K.shape(self.DA.outputs[0] )) + DB_ones = K.ones_like ( K.shape(self.DB.outputs[0] )) + DB_zeros = K.zeros_like ( K.shape(self.DB.outputs[0] )) + + def DLoss(labels,logits): + return K.mean(K.binary_crossentropy(labels,logits)) + + def CycleLoss (t1,t2): + return K.mean(K.abs(t1 - t2)) + + def RecurrentLOSS(t1,t2): + return K.mean(K.abs(t1 - t2)) + + def RecycleLOSS(t1,t2): + return K.mean(K.abs(t1 - t2)) + + fake_B0 = self.GA(real_A0) + fake_B1 = self.GA(real_A1) + + fake_A0 = self.GB(real_B0) + fake_A1 = self.GB(real_B1) + + real_A0_d = self.DA(real_A0) + real_A0_d_ones = K.ones_like(real_A0_d) + real_A1_d = self.DA(real_A1) + real_A1_d_ones = K.ones_like(real_A1_d) + + fake_A0_d = self.DA(fake_A0) + fake_A0_d_ones = K.ones_like(fake_A0_d) + fake_A0_d_zeros = K.zeros_like(fake_A0_d) + + fake_A1_d = self.DA(fake_A1) + fake_A1_d_ones = K.ones_like(fake_A1_d) + fake_A1_d_zeros = K.zeros_like(fake_A1_d) + + real_B0_d = self.DB(real_B0) + real_B0_d_ones = K.ones_like(real_B0_d) + + real_B1_d = self.DB(real_B1) + real_B1_d_ones = K.ones_like(real_B1_d) + + fake_B0_d = self.DB(fake_B0) + fake_B0_d_ones = K.ones_like(fake_B0_d) + fake_B0_d_zeros = K.zeros_like(fake_B0_d) + + fake_B1_d = self.DB(fake_B1) + fake_B1_d_ones = K.ones_like(fake_B1_d) + fake_B1_d_zeros = K.zeros_like(fake_B1_d) + + pred_A2 = self.PA ( [real_A0, real_A1]) + pred_B2 = self.PB ( [real_B0, real_B1]) + rec_A2 = self.GB ( self.PB ( [fake_B0, fake_B1]) ) + rec_B2 = self.GA ( self.PA ( [fake_A0, fake_A1])) + + + loss_GA = DLoss(fake_B0_d_ones, fake_B0_d ) + \ + DLoss(fake_B1_d_ones, fake_B1_d ) + \ + lambda_A * (RecurrentLOSS(pred_A2, real_A2) + \ + RecycleLOSS(rec_B2, real_B2) ) + + + weights_GA = self.GA.trainable_weights + self.PA.trainable_weights + + loss_GB = DLoss(fake_A0_d_ones, fake_A0_d ) + \ + DLoss(fake_A1_d_ones, fake_A1_d ) + \ + lambda_B * (RecurrentLOSS(pred_B2, real_B2) + \ + RecycleLOSS(rec_A2, real_A2) ) + + weights_GB = self.GB.trainable_weights + self.PB.trainable_weights + + def opt(): + return Adam(lr=2e-4, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2)#, clipnorm=1) + + self.GA_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_GA], + opt().get_updates(loss_GA, weights_GA) ) + + self.GB_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_GB], + opt().get_updates(loss_GB, weights_GB) ) + + ########### + + loss_D_A0 = ( DLoss(real_A0_d_ones, real_A0_d ) + \ + DLoss(fake_A0_d_zeros, fake_A0_d ) ) * 0.5 + + loss_D_A1 = ( DLoss(real_A1_d_ones, real_A1_d ) + \ + DLoss(fake_A1_d_zeros, fake_A1_d ) ) * 0.5 + + loss_D_A = loss_D_A0 + loss_D_A1 + + self.DA_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_A], + opt().get_updates(loss_D_A, self.DA.trainable_weights) ) + + ############ + + loss_D_B0 = ( DLoss(real_B0_d_ones, real_B0_d ) + \ + DLoss(fake_B0_d_zeros, fake_B0_d ) ) * 0.5 + + loss_D_B1 = ( DLoss(real_B1_d_ones, real_B1_d ) + \ + DLoss(fake_B1_d_zeros, fake_B1_d ) ) * 0.5 + + loss_D_B = loss_D_B0 + loss_D_B1 + + self.DB_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_B], + opt().get_updates(loss_D_B, self.DB.trainable_weights) ) + + ############ + + + self.G_view = K.function([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 ]) + + + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_SOURCE, t.MODE_BGR), 'resolution':resolution, 'normalize_tanh' : True} ] + + self.set_training_data_generators ([ + SampleGeneratorImageTemporal(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, + temporal_image_count=3, + sample_process_options=SampleProcessor.Options(random_flip = False), + output_sample_types=output_sample_types ), + + SampleGeneratorImageTemporal(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + temporal_image_count=3, + sample_process_options=SampleProcessor.Options(random_flip = False), + output_sample_types=output_sample_types ), + ]) + else: + self.G_convert = K.function([real_B0],[fake_A0]) + + #override + def get_model_filename_list(self): + return [ [self.GA, 'GA.h5'], + [self.GB, 'GB.h5'], + [self.DA, 'DA.h5'], + [self.DB, 'DB.h5'], + [self.PA, 'PA.h5'], + [self.PB, 'PB.h5'] ] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, generators_samples, generators_list): + source_src_0, source_src_1, source_src_2, = generators_samples[0] + source_dst_0, source_dst_1, source_dst_2, = generators_samples[1] + + feed = [source_src_0, source_src_1, source_src_2, source_dst_0, source_dst_1, source_dst_2] + + loss_GA, = self.GA_train ( feed ) + loss_GB, = self.GB_train ( feed ) + loss_DA, = self.DA_train( feed ) + loss_DB, = self.DB_train( feed ) + + return ( ('GA', loss_GA), ('GB', loss_GB), ('DA', loss_DA), ('DB', loss_DB) ) + + #override + def onGetPreview(self, sample): + test_A0 = sample[0][0] + test_A1 = sample[0][1] + test_A2 = sample[0][2] + + test_B0 = sample[1][0] + test_B1 = sample[1][1] + test_B2 = sample[1][2] + + G_view_result = self.G_view([test_A0, test_A1, test_A2, test_B0, test_B1, test_B2]) + + fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 = [ x[0] / 2 + 0.5 for x in G_view_result] + test_A0, test_A1, test_A2, test_B0, test_B1, test_B2 = [ x[0] / 2 + 0.5 for x in [test_A0, test_A1, test_A2, test_B0, test_B1, test_B2] ] + + r = np.concatenate ((np.concatenate ( (test_A0, test_A1, test_A2, pred_A2, fake_B0, fake_B1, rec_A2), axis=1), + np.concatenate ( (test_B0, test_B1, test_B2, pred_B2, fake_A0, fake_A1, rec_B2), axis=1) + ), axis=0) + + return [ ('RecycleGAN', r ) ] + + def predictor_func (self, face): + x = self.G_convert ( [ face[np.newaxis,...]*2-1 ] )[0] + return np.clip ( x[0] / 2 + 0.5 , 0, 1) + + #override + def get_converter(self, **in_options): + from converters import ConverterImage + return ConverterImage(self.predictor_func, + predictor_input_size=self.options['resolution'], + **in_options) + + @staticmethod + def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=-1)(x) + else: + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=-1)(x) + + XConv2D = partial(Conv2D, padding='same', use_bias=use_bias) + XConv2DTranspose = partial(Conv2DTranspose, padding='same', use_bias=use_bias) + + def func(input): + + + def ResnetBlock(dim, use_dropout=False): + def func(input): + x = input + + x = XConv2D(dim, 3, strides=1)(x) + x = XNormalization(x) + x = ReLU()(x) + + if use_dropout: + x = Dropout(0.5)(x) + + x = XConv2D(dim, 3, strides=1)(x) + x = XNormalization(x) + x = ReLU()(x) + return Add()([x,input]) + return func + + x = input + + x = ReLU()(XNormalization(XConv2D(ngf, 7, strides=1)(x))) + + x = ReLU()(XNormalization(XConv2D(ngf*2, 3, strides=2)(x))) + x = ReLU()(XNormalization(XConv2D(ngf*4, 3, strides=2)(x))) + + for i in range(n_blocks): + x = ResnetBlock(ngf*4, use_dropout=use_dropout)(x) + + x = ReLU()(XNormalization(XConv2DTranspose(ngf*2, 3, strides=2)(x))) + x = ReLU()(XNormalization(XConv2DTranspose(ngf , 3, strides=2)(x))) + + x = XConv2D(output_nc, 7, strides=1, activation='tanh', use_bias=True)(x) + + return x + + return func + + @staticmethod + def UNet(output_nc, use_batch_norm, ngf=64, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalizationL(): + return InstanceNormalization (axis=-1) + else: + use_bias = False + def XNormalizationL(): + return BatchNormalization (axis=-1) + + def XNormalization(x): + return XNormalizationL()(x) + + XConv2D = partial(Conv2D, padding='same', use_bias=use_bias) + XConv2DTranspose = partial(Conv2DTranspose, padding='same', use_bias=use_bias) + + def func(input): + + b,h,w,c = K.int_shape(input) + + n_downs = get_power_of_two(w) - 4 + + Norm = XNormalizationL() + Norm2 = XNormalizationL() + Norm4 = XNormalizationL() + Norm8 = XNormalizationL() + + x = input + + x = e1 = XConv2D( ngf, 4, strides=2, use_bias=True ) (x) + + x = e2 = Norm2( XConv2D( ngf*2, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) + x = e3 = Norm4( XConv2D( ngf*4, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) + + l = [] + for i in range(n_downs): + x = Norm8( XConv2D( ngf*8, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) + l += [x] + + x = XConv2D( ngf*8, 4, strides=2, use_bias=True )( LeakyReLU(0.2)(x) ) + + for i in range(n_downs): + x = Norm8( XConv2DTranspose( ngf*8, 4, strides=2 )( ReLU()(x) ) ) + if i <= n_downs-2: + x = Dropout(0.5)(x) + x = Concatenate(axis=-1)([x, l[-i-1] ]) + + x = Norm4( XConv2DTranspose( ngf*4, 4, strides=2 )( ReLU()(x) ) ) + x = Concatenate(axis=-1)([x, e3]) + + x = Norm2( XConv2DTranspose( ngf*2, 4, strides=2 )( ReLU()(x) ) ) + x = Concatenate(axis=-1)([x, e2]) + + x = Norm( XConv2DTranspose( ngf, 4, strides=2 )( ReLU()(x) ) ) + x = Concatenate(axis=-1)([x, e1]) + + x = XConv2DTranspose(output_nc, 4, strides=2, activation='tanh', use_bias=True)( ReLU()(x) ) + + return x + return func + nnlib.UNet = UNet + + @staticmethod + def UNetTemporalPredictor(output_nc, use_batch_norm, ngf=64, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + def func(inputs): + past_2_image_tensor, past_1_image_tensor = inputs + + x = Concatenate(axis=-1)([ past_2_image_tensor, past_1_image_tensor ]) + x = UNet(3, use_batch_norm, ngf=ngf, use_dropout=use_dropout) (x) + + return x + + return func + + @staticmethod + def PatchDiscriminator(ndf=64): + exec (nnlib.import_all(), locals(), globals()) + + #use_bias = True + #def XNormalization(x): + # return InstanceNormalization (axis=-1)(x) + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=-1)(x) + + XConv2D = partial(Conv2D, use_bias=use_bias) + + def func(input): + b,h,w,c = K.int_shape(input) + + x = input + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf, 4, strides=2, padding='valid', use_bias=True)(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf*2, 4, strides=2, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf*4, 4, strides=2, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf*8, 4, strides=2, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf*8, 4, strides=2, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + return XConv2D( 1, 4, strides=1, padding='valid', use_bias=True, activation='sigmoid')(x)# + return func + + @staticmethod + def NLayerDiscriminator(ndf=64, n_layers=3): + exec (nnlib.import_all(), locals(), globals()) + + #use_bias = True + #def XNormalization(x): + # return InstanceNormalization (axis=-1)(x) + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=-1)(x) + + XConv2D = partial(Conv2D, use_bias=use_bias) + + def func(input): + b,h,w,c = K.int_shape(input) + + x = input + + f = ndf + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( f, 4, strides=2, padding='valid', use_bias=True)(x) + f = min( ndf*8, f*2 ) + x = LeakyReLU(0.2)(x) + + for i in range(n_layers): + x = ZeroPadding2D((1,1))(x) + x = XConv2D( f, 4, strides=2, padding='valid')(x) + f = min( ndf*8, f*2 ) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( f, 4, strides=1, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + return XConv2D( 1, 4, strides=1, padding='valid', use_bias=True, activation='sigmoid')(x)# + return func + +Model = RecycleGANModel diff --git a/models/Model_SAE/Model.py b/models/Model_SAE/Model.py index f4fd2b6..80885f1 100644 --- a/models/Model_SAE/Model.py +++ b/models/Model_SAE/Model.py @@ -1,701 +1,701 @@ -from functools import partial -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -#SAE - Styled AutoEncoder -class SAEModel(ModelBase): - - encoderH5 = 'encoder.h5' - inter_BH5 = 'inter_B.h5' - inter_ABH5 = 'inter_AB.h5' - decoderH5 = 'decoder.h5' - decodermH5 = 'decoderm.h5' - - decoder_srcH5 = 'decoder_src.h5' - decoder_srcmH5 = 'decoder_srcm.h5' - decoder_dstH5 = 'decoder_dst.h5' - decoder_dstmH5 = 'decoder_dstm.h5' - - #override - def onInitializeOptions(self, is_first_run, ask_override): - yn_str = {True:'y',False:'n'} - - default_resolution = 128 - default_archi = 'df' - default_face_type = 'f' - - if is_first_run: - resolution = io.input_int("Resolution ( 64-256 ?:help skip:128) : ", default_resolution, help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") - resolution = np.clip (resolution, 64, 256) - while np.modf(resolution / 16)[0] != 0.0: - resolution -= 1 - self.options['resolution'] = resolution - - self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() - self.options['learn_mask'] = io.input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.") - else: - self.options['resolution'] = self.options.get('resolution', default_resolution) - self.options['face_type'] = self.options.get('face_type', default_face_type) - self.options['learn_mask'] = self.options.get('learn_mask', True) - - - if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend: - def_optimizer_mode = self.options.get('optimizer_mode', 1) - self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.") - else: - self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1) - - if is_first_run: - self.options['archi'] = io.input_str ("AE architecture (df, liae ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes.").lower() #-s version is slower, but has decreased change to collapse. - else: - self.options['archi'] = self.options.get('archi', default_archi) - - default_ae_dims = 256 if 'liae' in self.options['archi'] else 512 - default_e_ch_dims = 42 - default_d_ch_dims = default_e_ch_dims // 2 - def_ca_weights = False - - if is_first_run: - self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dims (32-1024 ?:help skip:%d) : " % (default_ae_dims) , default_ae_dims, help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 ) - self.options['e_ch_dims'] = np.clip ( io.input_int("Encoder dims per channel (21-85 ?:help skip:%d) : " % (default_e_ch_dims) , default_e_ch_dims, help_message="More encoder dims help to recognize more facial features, but require more VRAM. You can fine-tune model size to fit your GPU." ), 21, 85 ) - default_d_ch_dims = self.options['e_ch_dims'] // 2 - self.options['d_ch_dims'] = np.clip ( io.input_int("Decoder dims per channel (10-85 ?:help skip:%d) : " % (default_d_ch_dims) , default_d_ch_dims, help_message="More decoder dims help to get better details, but require more VRAM. You can fine-tune model size to fit your GPU." ), 10, 85 ) - self.options['multiscale_decoder'] = io.input_bool ("Use multiscale decoder? (y/n, ?:help skip:n) : ", False, help_message="Multiscale decoder helps to get better details.") - self.options['ca_weights'] = io.input_bool ("Use CA weights? (y/n, ?:help skip: %s ) : " % (yn_str[def_ca_weights]), def_ca_weights, help_message="Initialize network with 'Convolution Aware' weights. This may help to achieve a higher accuracy model, but consumes a time at first run.") - else: - self.options['ae_dims'] = self.options.get('ae_dims', default_ae_dims) - self.options['e_ch_dims'] = self.options.get('e_ch_dims', default_e_ch_dims) - self.options['d_ch_dims'] = self.options.get('d_ch_dims', default_d_ch_dims) - self.options['multiscale_decoder'] = self.options.get('multiscale_decoder', False) - self.options['ca_weights'] = self.options.get('ca_weights', def_ca_weights) - - default_face_style_power = 0.0 - default_bg_style_power = 0.0 - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: %s ) : " % (yn_str[def_pixel_loss]), def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time. Enabling this option too early increases the chance of model collapse.") - - default_face_style_power = default_face_style_power if is_first_run else self.options.get('face_style_power', default_face_style_power) - self.options['face_style_power'] = np.clip ( io.input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power, - help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) - - default_bg_style_power = default_bg_style_power if is_first_run else self.options.get('bg_style_power', default_bg_style_power) - self.options['bg_style_power'] = np.clip ( io.input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power, - help_message="Learn to transfer image around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) - - default_apply_random_ct = False if is_first_run else self.options.get('apply_random_ct', False) - self.options['apply_random_ct'] = io.input_bool ("Apply random color transfer to src faceset? (y/n, ?:help skip:%s) : " % (yn_str[default_apply_random_ct]), default_apply_random_ct, help_message="Increase variativity of src samples by apply LCT color transfer from random dst samples. It is like 'face_style' learning, but more precise color transfer and without risk of model collapse, also it does not require additional GPU resources, but the training time may be longer, due to the src faceset is becoming more diverse.") - - if nnlib.device.backend != 'plaidML': # todo https://github.com/plaidml/plaidml/issues/301 - default_clipgrad = False if is_first_run else self.options.get('clipgrad', False) - self.options['clipgrad'] = io.input_bool ("Enable gradient clipping? (y/n, ?:help skip:%s) : " % (yn_str[default_clipgrad]), default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.") - else: - self.options['clipgrad'] = False - - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power) - self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power) - self.options['apply_random_ct'] = self.options.get('apply_random_ct', False) - self.options['clipgrad'] = self.options.get('clipgrad', False) - - if is_first_run: - self.options['pretrain'] = io.input_bool ("Pretrain the model? (y/n, ?:help skip:n) : ", False, help_message="Pretrain the model with large amount of various faces. This technique may help to train the fake with overly different face shapes and light conditions of src/dst data. Face will be look more like a morphed. To reduce the morph effect, some model files will be initialized but not be updated after pretrain: LIAE: inter_AB.h5 DF: encoder.h5. The longer you pretrain the model the more morphed face will look. After that, save and run the training again.") - else: - self.options['pretrain'] = False - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - SAEModel.initialize_nn_functions() - self.set_vram_batch_requirements({1.5:4}) - - resolution = self.options['resolution'] - ae_dims = self.options['ae_dims'] - e_ch_dims = self.options['e_ch_dims'] - d_ch_dims = self.options['d_ch_dims'] - self.pretrain = self.options['pretrain'] = self.options.get('pretrain', False) - if not self.pretrain: - self.options.pop('pretrain') - - d_residual_blocks = True - bgr_shape = (resolution, resolution, 3) - mask_shape = (resolution, resolution, 1) - - self.ms_count = ms_count = 3 if (self.options['multiscale_decoder']) else 1 - - apply_random_ct = self.options.get('apply_random_ct', False) - masked_training = True - - warped_src = Input(bgr_shape) - target_src = Input(bgr_shape) - target_srcm = Input(mask_shape) - - warped_dst = Input(bgr_shape) - target_dst = Input(bgr_shape) - target_dstm = Input(mask_shape) - - target_src_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)] - target_srcm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)] - target_dst_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)] - target_dstm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)] - - common_flow_kwargs = { 'padding': 'zero', - 'norm': '', - 'act':'' } - models_list = [] - weights_to_load = [] - if 'liae' in self.options['archi']: - self.encoder = modelify(SAEModel.LIAEEncFlow(resolution, ch_dims=e_ch_dims, **common_flow_kwargs) ) (Input(bgr_shape)) - - enc_output_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ] - - self.inter_B = modelify(SAEModel.LIAEInterFlow(resolution, ae_dims=ae_dims, **common_flow_kwargs)) (enc_output_Inputs) - self.inter_AB = modelify(SAEModel.LIAEInterFlow(resolution, ae_dims=ae_dims, **common_flow_kwargs)) (enc_output_Inputs) - - inter_output_Inputs = [ Input( np.array(K.int_shape(x)[1:])*(1,1,2) ) for x in self.inter_B.outputs ] - - self.decoder = modelify(SAEModel.LIAEDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs)) (inter_output_Inputs) - models_list += [self.encoder, self.inter_B, self.inter_AB, self.decoder] - - if self.options['learn_mask']: - self.decoderm = modelify(SAEModel.LIAEDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs)) (inter_output_Inputs) - models_list += [self.decoderm] - - if not self.is_first_run(): - weights_to_load += [ [self.encoder , 'encoder.h5'], - [self.inter_B , 'inter_B.h5'], - [self.inter_AB, 'inter_AB.h5'], - [self.decoder , 'decoder.h5'], - ] - if self.options['learn_mask']: - weights_to_load += [ [self.decoderm, 'decoderm.h5'] ] - - warped_src_code = self.encoder (warped_src) - warped_src_inter_AB_code = self.inter_AB (warped_src_code) - warped_src_inter_code = Concatenate()([warped_src_inter_AB_code,warped_src_inter_AB_code]) - - warped_dst_code = self.encoder (warped_dst) - warped_dst_inter_B_code = self.inter_B (warped_dst_code) - warped_dst_inter_AB_code = self.inter_AB (warped_dst_code) - warped_dst_inter_code = Concatenate()([warped_dst_inter_B_code,warped_dst_inter_AB_code]) - - warped_src_dst_inter_code = Concatenate()([warped_dst_inter_AB_code,warped_dst_inter_AB_code]) - - pred_src_src = self.decoder(warped_src_inter_code) - pred_dst_dst = self.decoder(warped_dst_inter_code) - pred_src_dst = self.decoder(warped_src_dst_inter_code) - - if self.options['learn_mask']: - pred_src_srcm = self.decoderm(warped_src_inter_code) - pred_dst_dstm = self.decoderm(warped_dst_inter_code) - pred_src_dstm = self.decoderm(warped_src_dst_inter_code) - - elif 'df' in self.options['archi']: - self.encoder = modelify(SAEModel.DFEncFlow(resolution, ae_dims=ae_dims, ch_dims=e_ch_dims, **common_flow_kwargs) ) (Input(bgr_shape)) - - dec_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ] - - self.decoder_src = modelify(SAEModel.DFDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs )) (dec_Inputs) - self.decoder_dst = modelify(SAEModel.DFDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs )) (dec_Inputs) - models_list += [self.encoder, self.decoder_src, self.decoder_dst] - - if self.options['learn_mask']: - self.decoder_srcm = modelify(SAEModel.DFDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs )) (dec_Inputs) - self.decoder_dstm = modelify(SAEModel.DFDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs )) (dec_Inputs) - models_list += [self.decoder_srcm, self.decoder_dstm] - - if not self.is_first_run(): - weights_to_load += [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - if self.options['learn_mask']: - weights_to_load += [ [self.decoder_srcm, 'decoder_srcm.h5'], - [self.decoder_dstm, 'decoder_dstm.h5'], - ] - - warped_src_code = self.encoder (warped_src) - warped_dst_code = self.encoder (warped_dst) - pred_src_src = self.decoder_src(warped_src_code) - pred_dst_dst = self.decoder_dst(warped_dst_code) - pred_src_dst = self.decoder_src(warped_dst_code) - - if self.options['learn_mask']: - pred_src_srcm = self.decoder_srcm(warped_src_code) - pred_dst_dstm = self.decoder_dstm(warped_dst_code) - pred_src_dstm = self.decoder_srcm(warped_dst_code) - - if self.is_first_run(): - if self.options.get('ca_weights',False): - conv_weights_list = [] - for model in models_list: - for layer in model.layers: - if type(layer) == keras.layers.Conv2D: - conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights - CAInitializerMP ( conv_weights_list ) - else: - self.load_weights_safe(weights_to_load) - - pred_src_src, pred_dst_dst, pred_src_dst, = [ [x] if type(x) != list else x for x in [pred_src_src, pred_dst_dst, pred_src_dst, ] ] - - if self.options['learn_mask']: - pred_src_srcm, pred_dst_dstm, pred_src_dstm = [ [x] if type(x) != list else x for x in [pred_src_srcm, pred_dst_dstm, pred_src_dstm] ] - - target_srcm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_srcm_ar] - target_srcm_sigm_ar = target_srcm_blurred_ar #[ x / 2.0 + 0.5 for x in target_srcm_blurred_ar] - target_srcm_anti_sigm_ar = [ 1.0 - x for x in target_srcm_sigm_ar] - - target_dstm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_dstm_ar] - target_dstm_sigm_ar = target_dstm_blurred_ar#[ x / 2.0 + 0.5 for x in target_dstm_blurred_ar] - target_dstm_anti_sigm_ar = [ 1.0 - x for x in target_dstm_sigm_ar] - - target_src_sigm_ar = target_src_ar#[ x + 1 for x in target_src_ar] - target_dst_sigm_ar = target_dst_ar#[ x + 1 for x in target_dst_ar] - - pred_src_src_sigm_ar = pred_src_src#[ x + 1 for x in pred_src_src] - pred_dst_dst_sigm_ar = pred_dst_dst#[ x + 1 for x in pred_dst_dst] - pred_src_dst_sigm_ar = pred_src_dst#[ x + 1 for x in pred_src_dst] - - target_src_masked_ar = [ target_src_sigm_ar[i]*target_srcm_sigm_ar[i] for i in range(len(target_src_sigm_ar))] - target_dst_masked_ar = [ target_dst_sigm_ar[i]*target_dstm_sigm_ar[i] for i in range(len(target_dst_sigm_ar))] - target_dst_anti_masked_ar = [ target_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(target_dst_sigm_ar))] - - pred_src_src_masked_ar = [ pred_src_src_sigm_ar[i] * target_srcm_sigm_ar[i] for i in range(len(pred_src_src_sigm_ar))] - pred_dst_dst_masked_ar = [ pred_dst_dst_sigm_ar[i] * target_dstm_sigm_ar[i] for i in range(len(pred_dst_dst_sigm_ar))] - - target_src_masked_ar_opt = target_src_masked_ar if masked_training else target_src_sigm_ar - target_dst_masked_ar_opt = target_dst_masked_ar if masked_training else target_dst_sigm_ar - - pred_src_src_masked_ar_opt = pred_src_src_masked_ar if masked_training else pred_src_src_sigm_ar - pred_dst_dst_masked_ar_opt = pred_dst_dst_masked_ar if masked_training else pred_dst_dst_sigm_ar - - psd_target_dst_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] - psd_target_dst_anti_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] - - if self.is_training_mode: - self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) - self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) - - if 'liae' in self.options['archi']: - src_dst_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights - if self.options['learn_mask']: - src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoderm.trainable_weights - else: - src_dst_loss_train_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights + self.decoder_dst.trainable_weights - if self.options['learn_mask']: - src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights - - if not self.options['pixel_loss']: - src_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_ar_opt[i], pred_src_src_masked_ar_opt[i]) for i in range(len(target_src_masked_ar_opt)) ]) - else: - src_loss_batch = sum([ K.mean ( 50*K.square( target_src_masked_ar_opt[i] - pred_src_src_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_src_masked_ar_opt)) ]) - - src_loss = K.mean(src_loss_batch) - - face_style_power = self.options['face_style_power'] / 100.0 - - if face_style_power != 0: - src_loss += style_loss(gaussian_blur_radius=resolution//16, loss_weight=face_style_power, wnd_size=0)( psd_target_dst_masked_ar[-1], target_dst_masked_ar[-1] ) - - bg_style_power = self.options['bg_style_power'] / 100.0 - if bg_style_power != 0: - if not self.options['pixel_loss']: - bg_loss = K.mean( (10*bg_style_power)*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked_ar[-1], target_dst_anti_masked_ar[-1] )) - else: - bg_loss = K.mean( (50*bg_style_power)*K.square( psd_target_dst_anti_masked_ar[-1] - target_dst_anti_masked_ar[-1] )) - src_loss += bg_loss - - if not self.options['pixel_loss']: - dst_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)(target_dst_masked_ar_opt[i], pred_dst_dst_masked_ar_opt[i]) for i in range(len(target_dst_masked_ar_opt)) ]) - else: - dst_loss_batch = sum([ K.mean ( 50*K.square( target_dst_masked_ar_opt[i] - pred_dst_dst_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_dst_masked_ar_opt)) ]) - - dst_loss = K.mean(dst_loss_batch) - - feed = [warped_src, warped_dst] - feed += target_src_ar[::-1] - feed += target_srcm_ar[::-1] - feed += target_dst_ar[::-1] - feed += target_dstm_ar[::-1] - - self.src_dst_train = K.function (feed,[src_loss,dst_loss], self.src_dst_opt.get_updates(src_loss+dst_loss, src_dst_loss_train_weights) ) - - if self.options['learn_mask']: - src_mask_loss = sum([ K.mean(K.square(target_srcm_ar[-1]-pred_src_srcm[-1])) for i in range(len(target_srcm_ar)) ]) - dst_mask_loss = sum([ K.mean(K.square(target_dstm_ar[-1]-pred_dst_dstm[-1])) for i in range(len(target_dstm_ar)) ]) - - feed = [ warped_src, warped_dst] - feed += target_srcm_ar[::-1] - feed += target_dstm_ar[::-1] - - self.src_dst_mask_train = K.function (feed,[src_mask_loss, dst_mask_loss], self.src_dst_mask_opt.get_updates(src_mask_loss+dst_mask_loss, src_dst_mask_loss_train_weights) ) - - if self.options['learn_mask']: - self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src[-1], pred_dst_dst[-1], pred_dst_dstm[-1], pred_src_dst[-1], pred_src_dstm[-1]]) - else: - self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src[-1], pred_dst_dst[-1], pred_src_dst[-1] ] ) - - - else: - if self.options['learn_mask']: - self.AE_convert = K.function ([warped_dst],[ pred_src_dst[-1], pred_dst_dstm[-1], pred_src_dstm[-1] ]) - else: - self.AE_convert = K.function ([warped_dst],[ pred_src_dst[-1] ]) - - - if self.is_training_mode: - self.src_sample_losses = [] - self.dst_sample_losses = [] - - t = SampleProcessor.Types - face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF - - t_mode_bgr = t.MODE_BGR if not self.pretrain else t.MODE_BGR_SHUFFLE - - training_data_src_path = self.training_data_src_path - training_data_dst_path = self.training_data_dst_path - sort_by_yaw = self.sort_by_yaw - - if self.pretrain and self.pretraining_data_path is not None: - training_data_src_path = self.pretraining_data_path - training_data_dst_path = self.pretraining_data_path - sort_by_yaw = False - - self.set_training_data_generators ([ - SampleGeneratorFace(training_data_src_path, sort_by_yaw_target_samples_path=training_data_dst_path if sort_by_yaw else None, - random_ct_samples_path=training_data_dst_path if apply_random_ct else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution, 'apply_ct': apply_random_ct} ] + \ - [ {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution // (2**i), 'apply_ct': apply_random_ct } for i in range(ms_count)] + \ - [ {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution // (2**i) } for i in range(ms_count)] - ), - - SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, ), - output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution} ] + \ - [ {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution // (2**i)} for i in range(ms_count)] + \ - [ {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution // (2**i) } for i in range(ms_count)]) - ]) - - #override - def get_model_filename_list(self): - ar = [] - if 'liae' in self.options['archi']: - ar += [[self.encoder, 'encoder.h5'], - [self.inter_B, 'inter_B.h5'], - [self.decoder, 'decoder.h5'] - ] - - if not self.pretrain or self.iter == 0: - ar += [ [self.inter_AB, 'inter_AB.h5'], - ] - - if self.options['learn_mask']: - ar += [ [self.decoderm, 'decoderm.h5'] ] - - elif 'df' in self.options['archi']: - if not self.pretrain or self.iter == 0: - ar += [ [self.encoder, 'encoder.h5'], - ] - - ar += [ [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - - if self.options['learn_mask']: - ar += [ [self.decoder_srcm, 'decoder_srcm.h5'], - [self.decoder_dstm, 'decoder_dstm.h5'] ] - return ar - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, generators_samples, generators_list): - src_samples = generators_samples[0] - dst_samples = generators_samples[1] - - feed = [src_samples[0], dst_samples[0] ] + \ - src_samples[1:1+self.ms_count*2] + \ - dst_samples[1:1+self.ms_count*2] - - src_loss, dst_loss, = self.src_dst_train (feed) - - if self.options['learn_mask']: - feed = [ src_samples[0], dst_samples[0] ] + \ - src_samples[1+self.ms_count:1+self.ms_count*2] + \ - dst_samples[1+self.ms_count:1+self.ms_count*2] - src_mask_loss, dst_mask_loss, = self.src_dst_mask_train (feed) - - return ( ('src_loss', src_loss), ('dst_loss', dst_loss) ) - - - #override - def onGetPreview(self, sample): - test_S = sample[0][1][0:4] #first 4 samples - test_S_m = sample[0][1+self.ms_count][0:4] #first 4 samples - test_D = sample[1][1][0:4] - test_D_m = sample[1][1+self.ms_count][0:4] - - if self.options['learn_mask']: - S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] - DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ] - else: - S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] - - result = [] - st = [] - for i in range(0, len(test_S)): - ar = S[i], SS[i], D[i], DD[i], SD[i] - st.append ( np.concatenate ( ar, axis=1) ) - - result += [ ('SAE', np.concatenate (st, axis=0 )), ] - - if self.options['learn_mask']: - st_m = [] - for i in range(0, len(test_S)): - ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) - st_m.append ( np.concatenate ( ar, axis=1) ) - - result += [ ('SAE masked', np.concatenate (st_m, axis=0 )), ] - - return result - - def predictor_func (self, face): - if self.options['learn_mask']: - bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]]) - mask = mask_dst_dstm[0] * mask_src_dstm[0] - return bgr[0], mask[...,0] - else: - bgr, = self.AE_convert ([face[np.newaxis,...]]) - return bgr[0] - - #override - def get_converter(self): - base_erode_mask_modifier = 30 if self.options['face_type'] == 'f' else 100 - base_blur_mask_modifier = 0 if self.options['face_type'] == 'f' else 100 - - default_erode_mask_modifier = 0 - default_blur_mask_modifier = 100 if (self.options['face_style_power'] or self.options['bg_style_power']) and \ - self.options['face_type'] == 'f' else 0 - - face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=self.options['resolution'], - predictor_masked=self.options['learn_mask'], - face_type=face_type, - default_mode = 1 if self.options['apply_random_ct'] or self.options['face_style_power'] or self.options['bg_style_power'] else 4, - base_erode_mask_modifier=base_erode_mask_modifier, - base_blur_mask_modifier=base_blur_mask_modifier, - default_erode_mask_modifier=default_erode_mask_modifier, - default_blur_mask_modifier=default_blur_mask_modifier, - clip_hborder_mask_per=0.0625 if (self.options['face_type'] == 'f') else 0) - - @staticmethod - def initialize_nn_functions(): - exec (nnlib.import_all(), locals(), globals()) - - def NormPass(x): - return x - - def Norm(norm=''): - if norm == 'bn': - return BatchNormalization(axis=-1) - else: - return NormPass - - def Act(act='', lrelu_alpha=0.1): - if act == 'prelu': - return PReLU() - else: - return LeakyReLU(alpha=lrelu_alpha) - - class ResidualBlock(object): - def __init__(self, filters, kernel_size=3, padding='zero', norm='', act='', **kwargs): - self.filters = filters - self.kernel_size = kernel_size - self.padding = padding - self.norm = norm - self.act = act - - def __call__(self, inp): - x = inp - x = Conv2D(self.filters, kernel_size=self.kernel_size, padding=self.padding)(x) - x = Act(self.act, lrelu_alpha=0.2)(x) - x = Norm(self.norm)(x) - x = Conv2D(self.filters, kernel_size=self.kernel_size, padding=self.padding)(x) - x = Add()([x, inp]) - x = Act(self.act, lrelu_alpha=0.2)(x) - x = Norm(self.norm)(x) - return x - SAEModel.ResidualBlock = ResidualBlock - - def downscale (dim, padding='zero', norm='', act='', **kwargs): - def func(x): - return Norm(norm)( Act(act) (Conv2D(dim, kernel_size=5, strides=2, padding=padding)(x)) ) - return func - SAEModel.downscale = downscale - - def upscale (dim, padding='zero', norm='', act='', **kwargs): - def func(x): - return SubpixelUpscaler()(Norm(norm)(Act(act)(Conv2D(dim * 4, kernel_size=3, strides=1, padding=padding)(x)))) - return func - SAEModel.upscale = upscale - - def to_bgr (output_nc, padding='zero', **kwargs): - def func(x): - return Conv2D(output_nc, kernel_size=5, padding=padding, activation='sigmoid')(x) - return func - SAEModel.to_bgr = to_bgr - - @staticmethod - def LIAEEncFlow(resolution, ch_dims, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - downscale = partial(SAEModel.downscale, **kwargs) - - def func(input): - dims = K.int_shape(input)[-1]*ch_dims - - x = input - x = downscale(dims)(x) - x = downscale(dims*2)(x) - x = downscale(dims*4)(x) - x = downscale(dims*8)(x) - - x = Flatten()(x) - return x - return func - - @staticmethod - def LIAEInterFlow(resolution, ae_dims=256, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - lowest_dense_res=resolution // 16 - - def func(input): - x = input[0] - x = Dense(ae_dims)(x) - x = Dense(lowest_dense_res * lowest_dense_res * ae_dims*2)(x) - x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims*2))(x) - x = upscale(ae_dims*2)(x) - return x - return func - - @staticmethod - def LIAEDecFlow(output_nc,ch_dims, multiscale_count=1, add_residual_blocks=False, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - to_bgr = partial(SAEModel.to_bgr, **kwargs) - dims = output_nc * ch_dims - ResidualBlock = partial(SAEModel.ResidualBlock, **kwargs) - - def func(input): - x = input[0] - - outputs = [] - x1 = upscale(dims*8)( x ) - - if add_residual_blocks: - x1 = ResidualBlock(dims*8)(x1) - x1 = ResidualBlock(dims*8)(x1) - - if multiscale_count >= 3: - outputs += [ to_bgr(output_nc) ( x1 ) ] - - x2 = upscale(dims*4)( x1 ) - - if add_residual_blocks: - x2 = ResidualBlock(dims*4)(x2) - x2 = ResidualBlock(dims*4)(x2) - - if multiscale_count >= 2: - outputs += [ to_bgr(output_nc) ( x2 ) ] - - x3 = upscale(dims*2)( x2 ) - - if add_residual_blocks: - x3 = ResidualBlock( dims*2)(x3) - x3 = ResidualBlock( dims*2)(x3) - - outputs += [ to_bgr(output_nc) ( x3 ) ] - - return outputs - return func - - @staticmethod - def DFEncFlow(resolution, ae_dims, ch_dims, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - downscale = partial(SAEModel.downscale, **kwargs)#, kernel_regularizer=keras.regularizers.l2(0.0), - lowest_dense_res = resolution // 16 - - def func(input): - x = input - - dims = K.int_shape(input)[-1]*ch_dims - x = downscale(dims)(x) - x = downscale(dims*2)(x) - x = downscale(dims*4)(x) - x = downscale(dims*8)(x) - - x = Dense(ae_dims)(Flatten()(x)) - x = Dense(lowest_dense_res * lowest_dense_res * ae_dims)(x) - x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x) - x = upscale(ae_dims)(x) - return x - return func - - @staticmethod - def DFDecFlow(output_nc, ch_dims, multiscale_count=1, add_residual_blocks=False, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - to_bgr = partial(SAEModel.to_bgr, **kwargs) - dims = output_nc * ch_dims - ResidualBlock = partial(SAEModel.ResidualBlock, **kwargs) - - def func(input): - x = input[0] - - outputs = [] - x1 = upscale(dims*8)( x ) - - if add_residual_blocks: - x1 = ResidualBlock( dims*8 )(x1) - x1 = ResidualBlock( dims*8 )(x1) - - if multiscale_count >= 3: - outputs += [ to_bgr(output_nc) ( x1 ) ] - - x2 = upscale(dims*4)( x1 ) - - if add_residual_blocks: - x2 = ResidualBlock( dims*4)(x2) - x2 = ResidualBlock( dims*4)(x2) - - if multiscale_count >= 2: - outputs += [ to_bgr(output_nc) ( x2 ) ] - - x3 = upscale(dims*2)( x2 ) - - if add_residual_blocks: - x3 = ResidualBlock( dims*2)(x3) - x3 = ResidualBlock( dims*2)(x3) - - outputs += [ to_bgr(output_nc) ( x3 ) ] - - return outputs - return func - - +from functools import partial +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +#SAE - Styled AutoEncoder +class SAEModel(ModelBase): + + encoderH5 = 'encoder.h5' + inter_BH5 = 'inter_B.h5' + inter_ABH5 = 'inter_AB.h5' + decoderH5 = 'decoder.h5' + decodermH5 = 'decoderm.h5' + + decoder_srcH5 = 'decoder_src.h5' + decoder_srcmH5 = 'decoder_srcm.h5' + decoder_dstH5 = 'decoder_dst.h5' + decoder_dstmH5 = 'decoder_dstm.h5' + + #override + def onInitializeOptions(self, is_first_run, ask_override): + yn_str = {True:'y',False:'n'} + + default_resolution = 128 + default_archi = 'df' + default_face_type = 'f' + + if is_first_run: + resolution = io.input_int("Resolution ( 64-256 ?:help skip:128) : ", default_resolution, help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") + resolution = np.clip (resolution, 64, 256) + while np.modf(resolution / 16)[0] != 0.0: + resolution -= 1 + self.options['resolution'] = resolution + + self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() + self.options['learn_mask'] = io.input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.") + else: + self.options['resolution'] = self.options.get('resolution', default_resolution) + self.options['face_type'] = self.options.get('face_type', default_face_type) + self.options['learn_mask'] = self.options.get('learn_mask', True) + + + if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend: + def_optimizer_mode = self.options.get('optimizer_mode', 1) + self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.") + else: + self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1) + + if is_first_run: + self.options['archi'] = io.input_str ("AE architecture (df, liae ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes.").lower() #-s version is slower, but has decreased change to collapse. + else: + self.options['archi'] = self.options.get('archi', default_archi) + + default_ae_dims = 256 if 'liae' in self.options['archi'] else 512 + default_e_ch_dims = 42 + default_d_ch_dims = default_e_ch_dims // 2 + def_ca_weights = False + + if is_first_run: + self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dims (32-1024 ?:help skip:%d) : " % (default_ae_dims) , default_ae_dims, help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 ) + self.options['e_ch_dims'] = np.clip ( io.input_int("Encoder dims per channel (21-85 ?:help skip:%d) : " % (default_e_ch_dims) , default_e_ch_dims, help_message="More encoder dims help to recognize more facial features, but require more VRAM. You can fine-tune model size to fit your GPU." ), 21, 85 ) + default_d_ch_dims = self.options['e_ch_dims'] // 2 + self.options['d_ch_dims'] = np.clip ( io.input_int("Decoder dims per channel (10-85 ?:help skip:%d) : " % (default_d_ch_dims) , default_d_ch_dims, help_message="More decoder dims help to get better details, but require more VRAM. You can fine-tune model size to fit your GPU." ), 10, 85 ) + self.options['multiscale_decoder'] = io.input_bool ("Use multiscale decoder? (y/n, ?:help skip:n) : ", False, help_message="Multiscale decoder helps to get better details.") + self.options['ca_weights'] = io.input_bool ("Use CA weights? (y/n, ?:help skip: %s ) : " % (yn_str[def_ca_weights]), def_ca_weights, help_message="Initialize network with 'Convolution Aware' weights. This may help to achieve a higher accuracy model, but consumes a time at first run.") + else: + self.options['ae_dims'] = self.options.get('ae_dims', default_ae_dims) + self.options['e_ch_dims'] = self.options.get('e_ch_dims', default_e_ch_dims) + self.options['d_ch_dims'] = self.options.get('d_ch_dims', default_d_ch_dims) + self.options['multiscale_decoder'] = self.options.get('multiscale_decoder', False) + self.options['ca_weights'] = self.options.get('ca_weights', def_ca_weights) + + default_face_style_power = 0.0 + default_bg_style_power = 0.0 + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: %s ) : " % (yn_str[def_pixel_loss]), def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time. Enabling this option too early increases the chance of model collapse.") + + default_face_style_power = default_face_style_power if is_first_run else self.options.get('face_style_power', default_face_style_power) + self.options['face_style_power'] = np.clip ( io.input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power, + help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) + + default_bg_style_power = default_bg_style_power if is_first_run else self.options.get('bg_style_power', default_bg_style_power) + self.options['bg_style_power'] = np.clip ( io.input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power, + help_message="Learn to transfer image around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) + + default_apply_random_ct = False if is_first_run else self.options.get('apply_random_ct', False) + self.options['apply_random_ct'] = io.input_bool ("Apply random color transfer to src faceset? (y/n, ?:help skip:%s) : " % (yn_str[default_apply_random_ct]), default_apply_random_ct, help_message="Increase variativity of src samples by apply LCT color transfer from random dst samples. It is like 'face_style' learning, but more precise color transfer and without risk of model collapse, also it does not require additional GPU resources, but the training time may be longer, due to the src faceset is becoming more diverse.") + + if nnlib.device.backend != 'plaidML': # todo https://github.com/plaidml/plaidml/issues/301 + default_clipgrad = False if is_first_run else self.options.get('clipgrad', False) + self.options['clipgrad'] = io.input_bool ("Enable gradient clipping? (y/n, ?:help skip:%s) : " % (yn_str[default_clipgrad]), default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.") + else: + self.options['clipgrad'] = False + + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power) + self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power) + self.options['apply_random_ct'] = self.options.get('apply_random_ct', False) + self.options['clipgrad'] = self.options.get('clipgrad', False) + + if is_first_run: + self.options['pretrain'] = io.input_bool ("Pretrain the model? (y/n, ?:help skip:n) : ", False, help_message="Pretrain the model with large amount of various faces. This technique may help to train the fake with overly different face shapes and light conditions of src/dst data. Face will be look more like a morphed. To reduce the morph effect, some model files will be initialized but not be updated after pretrain: LIAE: inter_AB.h5 DF: encoder.h5. The longer you pretrain the model the more morphed face will look. After that, save and run the training again.") + else: + self.options['pretrain'] = False + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + SAEModel.initialize_nn_functions() + self.set_vram_batch_requirements({1.5:4}) + + resolution = self.options['resolution'] + ae_dims = self.options['ae_dims'] + e_ch_dims = self.options['e_ch_dims'] + d_ch_dims = self.options['d_ch_dims'] + self.pretrain = self.options['pretrain'] = self.options.get('pretrain', False) + if not self.pretrain: + self.options.pop('pretrain') + + d_residual_blocks = True + bgr_shape = (resolution, resolution, 3) + mask_shape = (resolution, resolution, 1) + + self.ms_count = ms_count = 3 if (self.options['multiscale_decoder']) else 1 + + apply_random_ct = self.options.get('apply_random_ct', False) + masked_training = True + + warped_src = Input(bgr_shape) + target_src = Input(bgr_shape) + target_srcm = Input(mask_shape) + + warped_dst = Input(bgr_shape) + target_dst = Input(bgr_shape) + target_dstm = Input(mask_shape) + + target_src_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)] + target_srcm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)] + target_dst_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)] + target_dstm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)] + + common_flow_kwargs = { 'padding': 'zero', + 'norm': '', + 'act':'' } + models_list = [] + weights_to_load = [] + if 'liae' in self.options['archi']: + self.encoder = modelify(SAEModel.LIAEEncFlow(resolution, ch_dims=e_ch_dims, **common_flow_kwargs) ) (Input(bgr_shape)) + + enc_output_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ] + + self.inter_B = modelify(SAEModel.LIAEInterFlow(resolution, ae_dims=ae_dims, **common_flow_kwargs)) (enc_output_Inputs) + self.inter_AB = modelify(SAEModel.LIAEInterFlow(resolution, ae_dims=ae_dims, **common_flow_kwargs)) (enc_output_Inputs) + + inter_output_Inputs = [ Input( np.array(K.int_shape(x)[1:])*(1,1,2) ) for x in self.inter_B.outputs ] + + self.decoder = modelify(SAEModel.LIAEDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs)) (inter_output_Inputs) + models_list += [self.encoder, self.inter_B, self.inter_AB, self.decoder] + + if self.options['learn_mask']: + self.decoderm = modelify(SAEModel.LIAEDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs)) (inter_output_Inputs) + models_list += [self.decoderm] + + if not self.is_first_run(): + weights_to_load += [ [self.encoder , 'encoder.h5'], + [self.inter_B , 'inter_B.h5'], + [self.inter_AB, 'inter_AB.h5'], + [self.decoder , 'decoder.h5'], + ] + if self.options['learn_mask']: + weights_to_load += [ [self.decoderm, 'decoderm.h5'] ] + + warped_src_code = self.encoder (warped_src) + warped_src_inter_AB_code = self.inter_AB (warped_src_code) + warped_src_inter_code = Concatenate()([warped_src_inter_AB_code,warped_src_inter_AB_code]) + + warped_dst_code = self.encoder (warped_dst) + warped_dst_inter_B_code = self.inter_B (warped_dst_code) + warped_dst_inter_AB_code = self.inter_AB (warped_dst_code) + warped_dst_inter_code = Concatenate()([warped_dst_inter_B_code,warped_dst_inter_AB_code]) + + warped_src_dst_inter_code = Concatenate()([warped_dst_inter_AB_code,warped_dst_inter_AB_code]) + + pred_src_src = self.decoder(warped_src_inter_code) + pred_dst_dst = self.decoder(warped_dst_inter_code) + pred_src_dst = self.decoder(warped_src_dst_inter_code) + + if self.options['learn_mask']: + pred_src_srcm = self.decoderm(warped_src_inter_code) + pred_dst_dstm = self.decoderm(warped_dst_inter_code) + pred_src_dstm = self.decoderm(warped_src_dst_inter_code) + + elif 'df' in self.options['archi']: + self.encoder = modelify(SAEModel.DFEncFlow(resolution, ae_dims=ae_dims, ch_dims=e_ch_dims, **common_flow_kwargs) ) (Input(bgr_shape)) + + dec_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ] + + self.decoder_src = modelify(SAEModel.DFDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs )) (dec_Inputs) + self.decoder_dst = modelify(SAEModel.DFDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs )) (dec_Inputs) + models_list += [self.encoder, self.decoder_src, self.decoder_dst] + + if self.options['learn_mask']: + self.decoder_srcm = modelify(SAEModel.DFDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs )) (dec_Inputs) + self.decoder_dstm = modelify(SAEModel.DFDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs )) (dec_Inputs) + models_list += [self.decoder_srcm, self.decoder_dstm] + + if not self.is_first_run(): + weights_to_load += [ [self.encoder , 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + if self.options['learn_mask']: + weights_to_load += [ [self.decoder_srcm, 'decoder_srcm.h5'], + [self.decoder_dstm, 'decoder_dstm.h5'], + ] + + warped_src_code = self.encoder (warped_src) + warped_dst_code = self.encoder (warped_dst) + pred_src_src = self.decoder_src(warped_src_code) + pred_dst_dst = self.decoder_dst(warped_dst_code) + pred_src_dst = self.decoder_src(warped_dst_code) + + if self.options['learn_mask']: + pred_src_srcm = self.decoder_srcm(warped_src_code) + pred_dst_dstm = self.decoder_dstm(warped_dst_code) + pred_src_dstm = self.decoder_srcm(warped_dst_code) + + if self.is_first_run(): + if self.options.get('ca_weights',False): + conv_weights_list = [] + for model in models_list: + for layer in model.layers: + if type(layer) == keras.layers.Conv2D: + conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights + CAInitializerMP ( conv_weights_list ) + else: + self.load_weights_safe(weights_to_load) + + pred_src_src, pred_dst_dst, pred_src_dst, = [ [x] if type(x) != list else x for x in [pred_src_src, pred_dst_dst, pred_src_dst, ] ] + + if self.options['learn_mask']: + pred_src_srcm, pred_dst_dstm, pred_src_dstm = [ [x] if type(x) != list else x for x in [pred_src_srcm, pred_dst_dstm, pred_src_dstm] ] + + target_srcm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_srcm_ar] + target_srcm_sigm_ar = target_srcm_blurred_ar #[ x / 2.0 + 0.5 for x in target_srcm_blurred_ar] + target_srcm_anti_sigm_ar = [ 1.0 - x for x in target_srcm_sigm_ar] + + target_dstm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_dstm_ar] + target_dstm_sigm_ar = target_dstm_blurred_ar#[ x / 2.0 + 0.5 for x in target_dstm_blurred_ar] + target_dstm_anti_sigm_ar = [ 1.0 - x for x in target_dstm_sigm_ar] + + target_src_sigm_ar = target_src_ar#[ x + 1 for x in target_src_ar] + target_dst_sigm_ar = target_dst_ar#[ x + 1 for x in target_dst_ar] + + pred_src_src_sigm_ar = pred_src_src#[ x + 1 for x in pred_src_src] + pred_dst_dst_sigm_ar = pred_dst_dst#[ x + 1 for x in pred_dst_dst] + pred_src_dst_sigm_ar = pred_src_dst#[ x + 1 for x in pred_src_dst] + + target_src_masked_ar = [ target_src_sigm_ar[i]*target_srcm_sigm_ar[i] for i in range(len(target_src_sigm_ar))] + target_dst_masked_ar = [ target_dst_sigm_ar[i]*target_dstm_sigm_ar[i] for i in range(len(target_dst_sigm_ar))] + target_dst_anti_masked_ar = [ target_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(target_dst_sigm_ar))] + + pred_src_src_masked_ar = [ pred_src_src_sigm_ar[i] * target_srcm_sigm_ar[i] for i in range(len(pred_src_src_sigm_ar))] + pred_dst_dst_masked_ar = [ pred_dst_dst_sigm_ar[i] * target_dstm_sigm_ar[i] for i in range(len(pred_dst_dst_sigm_ar))] + + target_src_masked_ar_opt = target_src_masked_ar if masked_training else target_src_sigm_ar + target_dst_masked_ar_opt = target_dst_masked_ar if masked_training else target_dst_sigm_ar + + pred_src_src_masked_ar_opt = pred_src_src_masked_ar if masked_training else pred_src_src_sigm_ar + pred_dst_dst_masked_ar_opt = pred_dst_dst_masked_ar if masked_training else pred_dst_dst_sigm_ar + + psd_target_dst_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] + psd_target_dst_anti_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] + + if self.is_training_mode: + self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) + self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) + + if 'liae' in self.options['archi']: + src_dst_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights + if self.options['learn_mask']: + src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoderm.trainable_weights + else: + src_dst_loss_train_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights + self.decoder_dst.trainable_weights + if self.options['learn_mask']: + src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights + + if not self.options['pixel_loss']: + src_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_ar_opt[i], pred_src_src_masked_ar_opt[i]) for i in range(len(target_src_masked_ar_opt)) ]) + else: + src_loss_batch = sum([ K.mean ( 50*K.square( target_src_masked_ar_opt[i] - pred_src_src_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_src_masked_ar_opt)) ]) + + src_loss = K.mean(src_loss_batch) + + face_style_power = self.options['face_style_power'] / 100.0 + + if face_style_power != 0: + src_loss += style_loss(gaussian_blur_radius=resolution//16, loss_weight=face_style_power, wnd_size=0)( psd_target_dst_masked_ar[-1], target_dst_masked_ar[-1] ) + + bg_style_power = self.options['bg_style_power'] / 100.0 + if bg_style_power != 0: + if not self.options['pixel_loss']: + bg_loss = K.mean( (10*bg_style_power)*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked_ar[-1], target_dst_anti_masked_ar[-1] )) + else: + bg_loss = K.mean( (50*bg_style_power)*K.square( psd_target_dst_anti_masked_ar[-1] - target_dst_anti_masked_ar[-1] )) + src_loss += bg_loss + + if not self.options['pixel_loss']: + dst_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)(target_dst_masked_ar_opt[i], pred_dst_dst_masked_ar_opt[i]) for i in range(len(target_dst_masked_ar_opt)) ]) + else: + dst_loss_batch = sum([ K.mean ( 50*K.square( target_dst_masked_ar_opt[i] - pred_dst_dst_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_dst_masked_ar_opt)) ]) + + dst_loss = K.mean(dst_loss_batch) + + feed = [warped_src, warped_dst] + feed += target_src_ar[::-1] + feed += target_srcm_ar[::-1] + feed += target_dst_ar[::-1] + feed += target_dstm_ar[::-1] + + self.src_dst_train = K.function (feed,[src_loss,dst_loss], self.src_dst_opt.get_updates(src_loss+dst_loss, src_dst_loss_train_weights) ) + + if self.options['learn_mask']: + src_mask_loss = sum([ K.mean(K.square(target_srcm_ar[-1]-pred_src_srcm[-1])) for i in range(len(target_srcm_ar)) ]) + dst_mask_loss = sum([ K.mean(K.square(target_dstm_ar[-1]-pred_dst_dstm[-1])) for i in range(len(target_dstm_ar)) ]) + + feed = [ warped_src, warped_dst] + feed += target_srcm_ar[::-1] + feed += target_dstm_ar[::-1] + + self.src_dst_mask_train = K.function (feed,[src_mask_loss, dst_mask_loss], self.src_dst_mask_opt.get_updates(src_mask_loss+dst_mask_loss, src_dst_mask_loss_train_weights) ) + + if self.options['learn_mask']: + self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src[-1], pred_dst_dst[-1], pred_dst_dstm[-1], pred_src_dst[-1], pred_src_dstm[-1]]) + else: + self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src[-1], pred_dst_dst[-1], pred_src_dst[-1] ] ) + + + else: + if self.options['learn_mask']: + self.AE_convert = K.function ([warped_dst],[ pred_src_dst[-1], pred_dst_dstm[-1], pred_src_dstm[-1] ]) + else: + self.AE_convert = K.function ([warped_dst],[ pred_src_dst[-1] ]) + + + if self.is_training_mode: + self.src_sample_losses = [] + self.dst_sample_losses = [] + + t = SampleProcessor.Types + face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF + + t_mode_bgr = t.MODE_BGR if not self.pretrain else t.MODE_BGR_SHUFFLE + + training_data_src_path = self.training_data_src_path + training_data_dst_path = self.training_data_dst_path + sort_by_yaw = self.sort_by_yaw + + if self.pretrain and self.pretraining_data_path is not None: + training_data_src_path = self.pretraining_data_path + training_data_dst_path = self.pretraining_data_path + sort_by_yaw = False + + self.set_training_data_generators ([ + SampleGeneratorFace(training_data_src_path, sort_by_yaw_target_samples_path=training_data_dst_path if sort_by_yaw else None, + random_ct_samples_path=training_data_dst_path if apply_random_ct else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution, 'apply_ct': apply_random_ct} ] + \ + [ {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution // (2**i), 'apply_ct': apply_random_ct } for i in range(ms_count)] + \ + [ {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution // (2**i) } for i in range(ms_count)] + ), + + SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, ), + output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution} ] + \ + [ {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution // (2**i)} for i in range(ms_count)] + \ + [ {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution // (2**i) } for i in range(ms_count)]) + ]) + + #override + def get_model_filename_list(self): + ar = [] + if 'liae' in self.options['archi']: + ar += [[self.encoder, 'encoder.h5'], + [self.inter_B, 'inter_B.h5'], + [self.decoder, 'decoder.h5'] + ] + + if not self.pretrain or self.iter == 0: + ar += [ [self.inter_AB, 'inter_AB.h5'], + ] + + if self.options['learn_mask']: + ar += [ [self.decoderm, 'decoderm.h5'] ] + + elif 'df' in self.options['archi']: + if not self.pretrain or self.iter == 0: + ar += [ [self.encoder, 'encoder.h5'], + ] + + ar += [ [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + + if self.options['learn_mask']: + ar += [ [self.decoder_srcm, 'decoder_srcm.h5'], + [self.decoder_dstm, 'decoder_dstm.h5'] ] + return ar + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, generators_samples, generators_list): + src_samples = generators_samples[0] + dst_samples = generators_samples[1] + + feed = [src_samples[0], dst_samples[0] ] + \ + src_samples[1:1+self.ms_count*2] + \ + dst_samples[1:1+self.ms_count*2] + + src_loss, dst_loss, = self.src_dst_train (feed) + + if self.options['learn_mask']: + feed = [ src_samples[0], dst_samples[0] ] + \ + src_samples[1+self.ms_count:1+self.ms_count*2] + \ + dst_samples[1+self.ms_count:1+self.ms_count*2] + src_mask_loss, dst_mask_loss, = self.src_dst_mask_train (feed) + + return ( ('src_loss', src_loss), ('dst_loss', dst_loss) ) + + + #override + def onGetPreview(self, sample): + test_S = sample[0][1][0:4] #first 4 samples + test_S_m = sample[0][1+self.ms_count][0:4] #first 4 samples + test_D = sample[1][1][0:4] + test_D_m = sample[1][1+self.ms_count][0:4] + + if self.options['learn_mask']: + S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] + DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ] + else: + S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] + + result = [] + st = [] + for i in range(0, len(test_S)): + ar = S[i], SS[i], D[i], DD[i], SD[i] + st.append ( np.concatenate ( ar, axis=1) ) + + result += [ ('SAE', np.concatenate (st, axis=0 )), ] + + if self.options['learn_mask']: + st_m = [] + for i in range(0, len(test_S)): + ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) + st_m.append ( np.concatenate ( ar, axis=1) ) + + result += [ ('SAE masked', np.concatenate (st_m, axis=0 )), ] + + return result + + def predictor_func (self, face): + if self.options['learn_mask']: + bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]]) + mask = mask_dst_dstm[0] * mask_src_dstm[0] + return bgr[0], mask[...,0] + else: + bgr, = self.AE_convert ([face[np.newaxis,...]]) + return bgr[0] + + #override + def get_converter(self): + base_erode_mask_modifier = 30 if self.options['face_type'] == 'f' else 100 + base_blur_mask_modifier = 0 if self.options['face_type'] == 'f' else 100 + + default_erode_mask_modifier = 0 + default_blur_mask_modifier = 100 if (self.options['face_style_power'] or self.options['bg_style_power']) and \ + self.options['face_type'] == 'f' else 0 + + face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF + + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=self.options['resolution'], + predictor_masked=self.options['learn_mask'], + face_type=face_type, + default_mode = 1 if self.options['apply_random_ct'] or self.options['face_style_power'] or self.options['bg_style_power'] else 4, + base_erode_mask_modifier=base_erode_mask_modifier, + base_blur_mask_modifier=base_blur_mask_modifier, + default_erode_mask_modifier=default_erode_mask_modifier, + default_blur_mask_modifier=default_blur_mask_modifier, + clip_hborder_mask_per=0.0625 if (self.options['face_type'] == 'f') else 0) + + @staticmethod + def initialize_nn_functions(): + exec (nnlib.import_all(), locals(), globals()) + + def NormPass(x): + return x + + def Norm(norm=''): + if norm == 'bn': + return BatchNormalization(axis=-1) + else: + return NormPass + + def Act(act='', lrelu_alpha=0.1): + if act == 'prelu': + return PReLU() + else: + return LeakyReLU(alpha=lrelu_alpha) + + class ResidualBlock(object): + def __init__(self, filters, kernel_size=3, padding='zero', norm='', act='', **kwargs): + self.filters = filters + self.kernel_size = kernel_size + self.padding = padding + self.norm = norm + self.act = act + + def __call__(self, inp): + x = inp + x = Conv2D(self.filters, kernel_size=self.kernel_size, padding=self.padding)(x) + x = Act(self.act, lrelu_alpha=0.2)(x) + x = Norm(self.norm)(x) + x = Conv2D(self.filters, kernel_size=self.kernel_size, padding=self.padding)(x) + x = Add()([x, inp]) + x = Act(self.act, lrelu_alpha=0.2)(x) + x = Norm(self.norm)(x) + return x + SAEModel.ResidualBlock = ResidualBlock + + def downscale (dim, padding='zero', norm='', act='', **kwargs): + def func(x): + return Norm(norm)( Act(act) (Conv2D(dim, kernel_size=5, strides=2, padding=padding)(x)) ) + return func + SAEModel.downscale = downscale + + def upscale (dim, padding='zero', norm='', act='', **kwargs): + def func(x): + return SubpixelUpscaler()(Norm(norm)(Act(act)(Conv2D(dim * 4, kernel_size=3, strides=1, padding=padding)(x)))) + return func + SAEModel.upscale = upscale + + def to_bgr (output_nc, padding='zero', **kwargs): + def func(x): + return Conv2D(output_nc, kernel_size=5, padding=padding, activation='sigmoid')(x) + return func + SAEModel.to_bgr = to_bgr + + @staticmethod + def LIAEEncFlow(resolution, ch_dims, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + downscale = partial(SAEModel.downscale, **kwargs) + + def func(input): + dims = K.int_shape(input)[-1]*ch_dims + + x = input + x = downscale(dims)(x) + x = downscale(dims*2)(x) + x = downscale(dims*4)(x) + x = downscale(dims*8)(x) + + x = Flatten()(x) + return x + return func + + @staticmethod + def LIAEInterFlow(resolution, ae_dims=256, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + lowest_dense_res=resolution // 16 + + def func(input): + x = input[0] + x = Dense(ae_dims)(x) + x = Dense(lowest_dense_res * lowest_dense_res * ae_dims*2)(x) + x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims*2))(x) + x = upscale(ae_dims*2)(x) + return x + return func + + @staticmethod + def LIAEDecFlow(output_nc,ch_dims, multiscale_count=1, add_residual_blocks=False, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + to_bgr = partial(SAEModel.to_bgr, **kwargs) + dims = output_nc * ch_dims + ResidualBlock = partial(SAEModel.ResidualBlock, **kwargs) + + def func(input): + x = input[0] + + outputs = [] + x1 = upscale(dims*8)( x ) + + if add_residual_blocks: + x1 = ResidualBlock(dims*8)(x1) + x1 = ResidualBlock(dims*8)(x1) + + if multiscale_count >= 3: + outputs += [ to_bgr(output_nc) ( x1 ) ] + + x2 = upscale(dims*4)( x1 ) + + if add_residual_blocks: + x2 = ResidualBlock(dims*4)(x2) + x2 = ResidualBlock(dims*4)(x2) + + if multiscale_count >= 2: + outputs += [ to_bgr(output_nc) ( x2 ) ] + + x3 = upscale(dims*2)( x2 ) + + if add_residual_blocks: + x3 = ResidualBlock( dims*2)(x3) + x3 = ResidualBlock( dims*2)(x3) + + outputs += [ to_bgr(output_nc) ( x3 ) ] + + return outputs + return func + + @staticmethod + def DFEncFlow(resolution, ae_dims, ch_dims, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + downscale = partial(SAEModel.downscale, **kwargs)#, kernel_regularizer=keras.regularizers.l2(0.0), + lowest_dense_res = resolution // 16 + + def func(input): + x = input + + dims = K.int_shape(input)[-1]*ch_dims + x = downscale(dims)(x) + x = downscale(dims*2)(x) + x = downscale(dims*4)(x) + x = downscale(dims*8)(x) + + x = Dense(ae_dims)(Flatten()(x)) + x = Dense(lowest_dense_res * lowest_dense_res * ae_dims)(x) + x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x) + x = upscale(ae_dims)(x) + return x + return func + + @staticmethod + def DFDecFlow(output_nc, ch_dims, multiscale_count=1, add_residual_blocks=False, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + to_bgr = partial(SAEModel.to_bgr, **kwargs) + dims = output_nc * ch_dims + ResidualBlock = partial(SAEModel.ResidualBlock, **kwargs) + + def func(input): + x = input[0] + + outputs = [] + x1 = upscale(dims*8)( x ) + + if add_residual_blocks: + x1 = ResidualBlock( dims*8 )(x1) + x1 = ResidualBlock( dims*8 )(x1) + + if multiscale_count >= 3: + outputs += [ to_bgr(output_nc) ( x1 ) ] + + x2 = upscale(dims*4)( x1 ) + + if add_residual_blocks: + x2 = ResidualBlock( dims*4)(x2) + x2 = ResidualBlock( dims*4)(x2) + + if multiscale_count >= 2: + outputs += [ to_bgr(output_nc) ( x2 ) ] + + x3 = upscale(dims*2)( x2 ) + + if add_residual_blocks: + x3 = ResidualBlock( dims*2)(x3) + x3 = ResidualBlock( dims*2)(x3) + + outputs += [ to_bgr(output_nc) ( x3 ) ] + + return outputs + return func + + Model = SAEModel \ No newline at end of file diff --git a/models/Model_SAE/__init__.py b/models/Model_SAE/__init__.py index 0188f11..704b01d 100644 --- a/models/Model_SAE/__init__.py +++ b/models/Model_SAE/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/__init__.py b/models/__init__.py index 971091d..f27088d 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -1,5 +1,5 @@ -from .ModelBase import ModelBase - -def import_model(name): - module = __import__('Model_'+name, globals(), locals(), [], 1) - return getattr(module, 'Model') +from .ModelBase import ModelBase + +def import_model(name): + module = __import__('Model_'+name, globals(), locals(), [], 1) + return getattr(module, 'Model') diff --git a/nnlib/CAInitializer.py b/nnlib/CAInitializer.py index f81dd06..4245334 100644 --- a/nnlib/CAInitializer.py +++ b/nnlib/CAInitializer.py @@ -1,112 +1,112 @@ -import numpy as np - -def _compute_fans(shape, data_format='channels_last'): - """Computes the number of input and output units for a weight shape. - # Arguments - shape: Integer shape tuple. - data_format: Image data format to use for convolution kernels. - Note that all kernels in Keras are standardized on the - `channels_last` ordering (even when inputs are set - to `channels_first`). - # Returns - A tuple of scalars, `(fan_in, fan_out)`. - # Raises - ValueError: in case of invalid `data_format` argument. - """ - if len(shape) == 2: - fan_in = shape[0] - fan_out = shape[1] - elif len(shape) in {3, 4, 5}: - # Assuming convolution kernels (1D, 2D or 3D). - # TH kernel shape: (depth, input_depth, ...) - # TF kernel shape: (..., input_depth, depth) - if data_format == 'channels_first': - receptive_field_size = np.prod(shape[2:]) - fan_in = shape[1] * receptive_field_size - fan_out = shape[0] * receptive_field_size - elif data_format == 'channels_last': - receptive_field_size = np.prod(shape[:-2]) - fan_in = shape[-2] * receptive_field_size - fan_out = shape[-1] * receptive_field_size - else: - raise ValueError('Invalid data_format: ' + data_format) - else: - # No specific assumptions. - fan_in = np.sqrt(np.prod(shape)) - fan_out = np.sqrt(np.prod(shape)) - return fan_in, fan_out - -def _create_basis(filters, size, floatx, eps_std): - if size == 1: - return np.random.normal(0.0, eps_std, (filters, size)) - - nbb = filters // size + 1 - li = [] - for i in range(nbb): - a = np.random.normal(0.0, 1.0, (size, size)) - a = _symmetrize(a) - u, _, v = np.linalg.svd(a) - li.extend(u.T.tolist()) - p = np.array(li[:filters], dtype=floatx) - return p - -def _symmetrize(a): - return a + a.T - np.diag(a.diagonal()) - -def _scale_filters(filters, variance): - c_var = np.var(filters) - p = np.sqrt(variance / c_var) - return filters * p - -def CAGenerateWeights ( shape, floatx, data_format, eps_std=0.05, seed=None ): - if seed is not None: - np.random.seed(seed) - - fan_in, fan_out = _compute_fans(shape, data_format) - variance = 2 / fan_in - - rank = len(shape) - if rank == 3: - row, stack_size, filters_size = shape - - transpose_dimensions = (2, 1, 0) - kernel_shape = (row,) - correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0]) - correct_fft = np.fft.rfft - - elif rank == 4: - row, column, stack_size, filters_size = shape - - transpose_dimensions = (2, 3, 1, 0) - kernel_shape = (row, column) - correct_ifft = np.fft.irfft2 - correct_fft = np.fft.rfft2 - - elif rank == 5: - x, y, z, stack_size, filters_size = shape - - transpose_dimensions = (3, 4, 0, 1, 2) - kernel_shape = (x, y, z) - correct_fft = np.fft.rfftn - correct_ifft = np.fft.irfftn - else: - raise ValueError('rank unsupported') - - kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape - - init = [] - for i in range(filters_size): - basis = _create_basis( - stack_size, np.prod(kernel_fourier_shape), floatx, eps_std) - basis = basis.reshape((stack_size,) + kernel_fourier_shape) - - filters = [correct_ifft(x, kernel_shape) + - np.random.normal(0, eps_std, kernel_shape) for - x in basis] - - init.append(filters) - - # Format of array is now: filters, stack, row, column - init = np.array(init) - init = _scale_filters(init, variance) - return init.transpose(transpose_dimensions) +import numpy as np + +def _compute_fans(shape, data_format='channels_last'): + """Computes the number of input and output units for a weight shape. + # Arguments + shape: Integer shape tuple. + data_format: Image data format to use for convolution kernels. + Note that all kernels in Keras are standardized on the + `channels_last` ordering (even when inputs are set + to `channels_first`). + # Returns + A tuple of scalars, `(fan_in, fan_out)`. + # Raises + ValueError: in case of invalid `data_format` argument. + """ + if len(shape) == 2: + fan_in = shape[0] + fan_out = shape[1] + elif len(shape) in {3, 4, 5}: + # Assuming convolution kernels (1D, 2D or 3D). + # TH kernel shape: (depth, input_depth, ...) + # TF kernel shape: (..., input_depth, depth) + if data_format == 'channels_first': + receptive_field_size = np.prod(shape[2:]) + fan_in = shape[1] * receptive_field_size + fan_out = shape[0] * receptive_field_size + elif data_format == 'channels_last': + receptive_field_size = np.prod(shape[:-2]) + fan_in = shape[-2] * receptive_field_size + fan_out = shape[-1] * receptive_field_size + else: + raise ValueError('Invalid data_format: ' + data_format) + else: + # No specific assumptions. + fan_in = np.sqrt(np.prod(shape)) + fan_out = np.sqrt(np.prod(shape)) + return fan_in, fan_out + +def _create_basis(filters, size, floatx, eps_std): + if size == 1: + return np.random.normal(0.0, eps_std, (filters, size)) + + nbb = filters // size + 1 + li = [] + for i in range(nbb): + a = np.random.normal(0.0, 1.0, (size, size)) + a = _symmetrize(a) + u, _, v = np.linalg.svd(a) + li.extend(u.T.tolist()) + p = np.array(li[:filters], dtype=floatx) + return p + +def _symmetrize(a): + return a + a.T - np.diag(a.diagonal()) + +def _scale_filters(filters, variance): + c_var = np.var(filters) + p = np.sqrt(variance / c_var) + return filters * p + +def CAGenerateWeights ( shape, floatx, data_format, eps_std=0.05, seed=None ): + if seed is not None: + np.random.seed(seed) + + fan_in, fan_out = _compute_fans(shape, data_format) + variance = 2 / fan_in + + rank = len(shape) + if rank == 3: + row, stack_size, filters_size = shape + + transpose_dimensions = (2, 1, 0) + kernel_shape = (row,) + correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0]) + correct_fft = np.fft.rfft + + elif rank == 4: + row, column, stack_size, filters_size = shape + + transpose_dimensions = (2, 3, 1, 0) + kernel_shape = (row, column) + correct_ifft = np.fft.irfft2 + correct_fft = np.fft.rfft2 + + elif rank == 5: + x, y, z, stack_size, filters_size = shape + + transpose_dimensions = (3, 4, 0, 1, 2) + kernel_shape = (x, y, z) + correct_fft = np.fft.rfftn + correct_ifft = np.fft.irfftn + else: + raise ValueError('rank unsupported') + + kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape + + init = [] + for i in range(filters_size): + basis = _create_basis( + stack_size, np.prod(kernel_fourier_shape), floatx, eps_std) + basis = basis.reshape((stack_size,) + kernel_fourier_shape) + + filters = [correct_ifft(x, kernel_shape) + + np.random.normal(0, eps_std, kernel_shape) for + x in basis] + + init.append(filters) + + # Format of array is now: filters, stack, row, column + init = np.array(init) + init = _scale_filters(init, variance) + return init.transpose(transpose_dimensions) diff --git a/nnlib/__init__.py b/nnlib/__init__.py index 14793f7..1579fe5 100644 --- a/nnlib/__init__.py +++ b/nnlib/__init__.py @@ -1 +1 @@ -from .nnlib import nnlib +from .nnlib import nnlib diff --git a/nnlib/device.py b/nnlib/device.py index 144de43..e1ad8d7 100644 --- a/nnlib/device.py +++ b/nnlib/device.py @@ -1,357 +1,357 @@ -import os -import json -import numpy as np -from .pynvml import * - -#you can set DFL_TF_MIN_REQ_CAP manually for your build -#the reason why we cannot check tensorflow.version is it requires import tensorflow -tf_min_req_cap = int(os.environ.get("DFL_TF_MIN_REQ_CAP", 35)) - -class device: - backend = None - class Config(): - force_gpu_idx = -1 - multi_gpu = False - force_gpu_idxs = None - choose_worst_gpu = False - gpu_idxs = [] - gpu_names = [] - gpu_compute_caps = [] - gpu_vram_gb = [] - allow_growth = True - use_fp16 = False - cpu_only = False - backend = None - def __init__ (self, force_gpu_idx = -1, - multi_gpu = False, - force_gpu_idxs = None, - choose_worst_gpu = False, - allow_growth = True, - use_fp16 = False, - cpu_only = False, - **in_options): - - self.backend = device.backend - self.use_fp16 = use_fp16 - self.cpu_only = cpu_only - - if not self.cpu_only: - self.cpu_only = (self.backend == "tensorflow-cpu") - - if not self.cpu_only: - self.force_gpu_idx = force_gpu_idx - self.multi_gpu = multi_gpu - self.force_gpu_idxs = force_gpu_idxs - self.choose_worst_gpu = choose_worst_gpu - self.allow_growth = allow_growth - - self.gpu_idxs = [] - - if force_gpu_idxs is not None: - for idx in force_gpu_idxs.split(','): - idx = int(idx) - if device.isValidDeviceIdx(idx): - self.gpu_idxs.append(idx) - else: - gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx() - if gpu_idx != -1: - if self.multi_gpu: - self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx ) - if len(self.gpu_idxs) <= 1: - self.multi_gpu = False - else: - self.gpu_idxs = [gpu_idx] - - self.cpu_only = (len(self.gpu_idxs) == 0) - - - if not self.cpu_only: - self.gpu_names = [] - self.gpu_compute_caps = [] - self.gpu_vram_gb = [] - for gpu_idx in self.gpu_idxs: - self.gpu_names += [device.getDeviceName(gpu_idx)] - self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ] - self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ] - self.cpu_only = (len(self.gpu_idxs) == 0) - else: - self.gpu_names = ['CPU'] - self.gpu_compute_caps = [99] - self.gpu_vram_gb = [0] - - if self.cpu_only: - self.backend = "tensorflow-cpu" - - @staticmethod - def getValidDeviceIdxsEnumerator(): - if device.backend == "plaidML": - for i in range(plaidML_devices_count): - yield i - elif device.backend == "tensorflow": - for gpu_idx in range(nvmlDeviceGetCount()): - cap = device.getDeviceComputeCapability (gpu_idx) - if cap >= tf_min_req_cap: - yield gpu_idx - elif device.backend == "tensorflow-generic": - yield 0 - - - @staticmethod - def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb): - result = [] - if device.backend == "plaidML": - for i in device.getValidDeviceIdxsEnumerator(): - if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024: - result.append (i) - elif device.backend == "tensorflow": - for i in device.getValidDeviceIdxsEnumerator(): - handle = nvmlDeviceGetHandleByIndex(i) - memInfo = nvmlDeviceGetMemoryInfo( handle ) - if (memInfo.total) >= totalmemsize_gb*1024*1024*1024: - result.append (i) - elif device.backend == "tensorflow-generic": - return [0] - - return result - - @staticmethod - def getAllDevicesIdxsList(): - if device.backend == "plaidML": - return [ *range(plaidML_devices_count) ] - elif device.backend == "tensorflow": - return [ *range(nvmlDeviceGetCount() ) ] - elif device.backend == "tensorflow-generic": - return [0] - - @staticmethod - def getValidDevicesIdxsWithNamesList(): - if device.backend == "plaidML": - return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ] - elif device.backend == "tensorflow": - return [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in device.getValidDeviceIdxsEnumerator() ] - elif device.backend == "tensorflow-cpu": - return [ (0, 'CPU') ] - elif device.backend == "tensorflow-generic": - return [ (0, device.getDeviceName(0) ) ] - - @staticmethod - def getDeviceVRAMTotalGb (idx): - if device.backend == "plaidML": - if idx < plaidML_devices_count: - return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024) - elif device.backend == "tensorflow": - if idx < nvmlDeviceGetCount(): - memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) ) - return round ( memInfo.total / (1024*1024*1024) ) - - return 0 - elif device.backend == "tensorflow-generic": - return 2 - - @staticmethod - def getBestValidDeviceIdx(): - if device.backend == "plaidML": - idx = -1 - idx_mem = 0 - for i in device.getValidDeviceIdxsEnumerator(): - total = plaidML_devices[i]['globalMemSize'] - if total > idx_mem: - idx = i - idx_mem = total - - return idx - elif device.backend == "tensorflow": - idx = -1 - idx_mem = 0 - for i in device.getValidDeviceIdxsEnumerator(): - memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) ) - if memInfo.total > idx_mem: - idx = i - idx_mem = memInfo.total - - return idx - elif device.backend == "tensorflow-generic": - return 0 - - @staticmethod - def getWorstValidDeviceIdx(): - if device.backend == "plaidML": - idx = -1 - idx_mem = sys.maxsize - for i in device.getValidDeviceIdxsEnumerator(): - total = plaidML_devices[i]['globalMemSize'] - if total < idx_mem: - idx = i - idx_mem = total - - return idx - elif device.backend == "tensorflow": - idx = -1 - idx_mem = sys.maxsize - for i in device.getValidDeviceIdxsEnumerator(): - memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) ) - if memInfo.total < idx_mem: - idx = i - idx_mem = memInfo.total - - return idx - elif device.backend == "tensorflow-generic": - return 0 - - @staticmethod - def isValidDeviceIdx(idx): - if device.backend == "plaidML": - return idx in [*device.getValidDeviceIdxsEnumerator()] - elif device.backend == "tensorflow": - return idx in [*device.getValidDeviceIdxsEnumerator()] - elif device.backend == "tensorflow-generic": - return (idx == 0) - - @staticmethod - def getDeviceIdxsEqualModel(idx): - if device.backend == "plaidML": - result = [] - idx_name = plaidML_devices[idx]['description'] - for i in device.getValidDeviceIdxsEnumerator(): - if plaidML_devices[i]['description'] == idx_name: - result.append (i) - - return result - elif device.backend == "tensorflow": - result = [] - idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() - for i in device.getValidDeviceIdxsEnumerator(): - if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name: - result.append (i) - - return result - elif device.backend == "tensorflow-generic": - return [0] if idx == 0 else [] - - @staticmethod - def getDeviceName (idx): - if device.backend == "plaidML": - if idx < plaidML_devices_count: - return plaidML_devices[idx]['description'] - elif device.backend == "tensorflow": - if idx < nvmlDeviceGetCount(): - return nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() - elif device.backend == "tensorflow-generic": - if idx == 0: - return "Generic GeForce GPU" - - return None - - @staticmethod - def getDeviceID (idx): - if device.backend == "plaidML": - if idx < plaidML_devices_count: - return plaidML_devices[idx]['id'].decode() - - return None - - @staticmethod - def getDeviceComputeCapability(idx): - result = 0 - if device.backend == "plaidML": - return 99 - elif device.backend == "tensorflow": - if idx < nvmlDeviceGetCount(): - result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx)) - elif device.backend == "tensorflow-generic": - return 99 if idx == 0 else 0 - - return result[0] * 10 + result[1] - - -force_plaidML = os.environ.get("DFL_FORCE_PLAIDML", "0") == "1" #for OpenCL build , forcing using plaidML even if NVIDIA found -force_tf_cpu = os.environ.get("DFL_FORCE_TF_CPU", "0") == "1" #for OpenCL build , forcing using tf-cpu if plaidML failed -has_nvml = False -has_nvml_cap = False - -#use DFL_FORCE_HAS_NVIDIA_DEVICE=1 if -#- your NVIDIA cannot be seen by OpenCL -#- CUDA build of DFL -has_nvidia_device = os.environ.get("DFL_FORCE_HAS_NVIDIA_DEVICE", "0") == "1" - -plaidML_devices = None -def get_plaidML_devices(): - global plaidML_devices - global has_nvidia_device - if plaidML_devices is None: - plaidML_devices = [] - # Using plaidML OpenCL backend to determine system devices and has_nvidia_device - try: - os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup' - import plaidml - ctx = plaidml.Context() - for d in plaidml.devices(ctx, return_all=True)[0]: - details = json.loads(d.details) - if details['type'] == 'CPU': #skipping opencl-CPU - continue - if 'nvidia' in details['vendor'].lower(): - has_nvidia_device = True - plaidML_devices += [ {'id':d.id, - 'globalMemSize' : int(details['globalMemSize']), - 'description' : d.description.decode() - }] - ctx.shutdown() - except: - pass - return plaidML_devices - -if not has_nvidia_device: - get_plaidML_devices() - -#choosing backend - -if device.backend is None and not force_tf_cpu: - #first trying to load NVSMI and detect CUDA devices for tensorflow backend, - #even force_plaidML is choosed, because if plaidML will fail, we can choose tensorflow - try: - nvmlInit() - has_nvml = True - device.backend = "tensorflow" #set tensorflow backend in order to use device.*device() functions - - gpu_idxs = device.getAllDevicesIdxsList() - gpu_caps = np.array ( [ device.getDeviceComputeCapability(gpu_idx) for gpu_idx in gpu_idxs ] ) - - if len ( np.ndarray.flatten ( np.argwhere (gpu_caps >= tf_min_req_cap) ) ) == 0: - if not force_plaidML: - print ("No CUDA devices found with minimum required compute capability: %d.%d. Falling back to OpenCL mode." % (tf_min_req_cap // 10, tf_min_req_cap % 10) ) - device.backend = None - nvmlShutdown() - else: - has_nvml_cap = True - except: - #if no NVSMI installed exception will occur - device.backend = None - has_nvml = False - -if force_plaidML or (device.backend is None and not has_nvidia_device): - #tensorflow backend was failed without has_nvidia_device , or forcing plaidML, trying to use plaidML backend - if len(get_plaidML_devices()) == 0: - #print ("plaidML: No capable OpenCL devices found. Falling back to tensorflow backend.") - device.backend = None - else: - device.backend = "plaidML" - plaidML_devices_count = len(get_plaidML_devices()) - -if device.backend is None: - if force_tf_cpu: - device.backend = "tensorflow-cpu" - elif not has_nvml: - if has_nvidia_device: - #some notebook systems have NVIDIA card without NVSMI in official drivers - #in that case considering we have system with one capable GPU and let tensorflow to choose best GPU - device.backend = "tensorflow-generic" - else: - #no NVSMI and no NVIDIA cards, also plaidML was failed, then CPU only - device.backend = "tensorflow-cpu" - else: - if has_nvml_cap: - #has NVSMI and capable CUDA-devices, but force_plaidML was failed, then we choosing tensorflow - device.backend = "tensorflow" - else: - #has NVSMI, no capable CUDA-devices, also plaidML was failed, then CPU only - device.backend = "tensorflow-cpu" +import os +import json +import numpy as np +from .pynvml import * + +#you can set DFL_TF_MIN_REQ_CAP manually for your build +#the reason why we cannot check tensorflow.version is it requires import tensorflow +tf_min_req_cap = int(os.environ.get("DFL_TF_MIN_REQ_CAP", 35)) + +class device: + backend = None + class Config(): + force_gpu_idx = -1 + multi_gpu = False + force_gpu_idxs = None + choose_worst_gpu = False + gpu_idxs = [] + gpu_names = [] + gpu_compute_caps = [] + gpu_vram_gb = [] + allow_growth = True + use_fp16 = False + cpu_only = False + backend = None + def __init__ (self, force_gpu_idx = -1, + multi_gpu = False, + force_gpu_idxs = None, + choose_worst_gpu = False, + allow_growth = True, + use_fp16 = False, + cpu_only = False, + **in_options): + + self.backend = device.backend + self.use_fp16 = use_fp16 + self.cpu_only = cpu_only + + if not self.cpu_only: + self.cpu_only = (self.backend == "tensorflow-cpu") + + if not self.cpu_only: + self.force_gpu_idx = force_gpu_idx + self.multi_gpu = multi_gpu + self.force_gpu_idxs = force_gpu_idxs + self.choose_worst_gpu = choose_worst_gpu + self.allow_growth = allow_growth + + self.gpu_idxs = [] + + if force_gpu_idxs is not None: + for idx in force_gpu_idxs.split(','): + idx = int(idx) + if device.isValidDeviceIdx(idx): + self.gpu_idxs.append(idx) + else: + gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx() + if gpu_idx != -1: + if self.multi_gpu: + self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx ) + if len(self.gpu_idxs) <= 1: + self.multi_gpu = False + else: + self.gpu_idxs = [gpu_idx] + + self.cpu_only = (len(self.gpu_idxs) == 0) + + + if not self.cpu_only: + self.gpu_names = [] + self.gpu_compute_caps = [] + self.gpu_vram_gb = [] + for gpu_idx in self.gpu_idxs: + self.gpu_names += [device.getDeviceName(gpu_idx)] + self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ] + self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ] + self.cpu_only = (len(self.gpu_idxs) == 0) + else: + self.gpu_names = ['CPU'] + self.gpu_compute_caps = [99] + self.gpu_vram_gb = [0] + + if self.cpu_only: + self.backend = "tensorflow-cpu" + + @staticmethod + def getValidDeviceIdxsEnumerator(): + if device.backend == "plaidML": + for i in range(plaidML_devices_count): + yield i + elif device.backend == "tensorflow": + for gpu_idx in range(nvmlDeviceGetCount()): + cap = device.getDeviceComputeCapability (gpu_idx) + if cap >= tf_min_req_cap: + yield gpu_idx + elif device.backend == "tensorflow-generic": + yield 0 + + + @staticmethod + def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb): + result = [] + if device.backend == "plaidML": + for i in device.getValidDeviceIdxsEnumerator(): + if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024: + result.append (i) + elif device.backend == "tensorflow": + for i in device.getValidDeviceIdxsEnumerator(): + handle = nvmlDeviceGetHandleByIndex(i) + memInfo = nvmlDeviceGetMemoryInfo( handle ) + if (memInfo.total) >= totalmemsize_gb*1024*1024*1024: + result.append (i) + elif device.backend == "tensorflow-generic": + return [0] + + return result + + @staticmethod + def getAllDevicesIdxsList(): + if device.backend == "plaidML": + return [ *range(plaidML_devices_count) ] + elif device.backend == "tensorflow": + return [ *range(nvmlDeviceGetCount() ) ] + elif device.backend == "tensorflow-generic": + return [0] + + @staticmethod + def getValidDevicesIdxsWithNamesList(): + if device.backend == "plaidML": + return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ] + elif device.backend == "tensorflow": + return [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in device.getValidDeviceIdxsEnumerator() ] + elif device.backend == "tensorflow-cpu": + return [ (0, 'CPU') ] + elif device.backend == "tensorflow-generic": + return [ (0, device.getDeviceName(0) ) ] + + @staticmethod + def getDeviceVRAMTotalGb (idx): + if device.backend == "plaidML": + if idx < plaidML_devices_count: + return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024) + elif device.backend == "tensorflow": + if idx < nvmlDeviceGetCount(): + memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) ) + return round ( memInfo.total / (1024*1024*1024) ) + + return 0 + elif device.backend == "tensorflow-generic": + return 2 + + @staticmethod + def getBestValidDeviceIdx(): + if device.backend == "plaidML": + idx = -1 + idx_mem = 0 + for i in device.getValidDeviceIdxsEnumerator(): + total = plaidML_devices[i]['globalMemSize'] + if total > idx_mem: + idx = i + idx_mem = total + + return idx + elif device.backend == "tensorflow": + idx = -1 + idx_mem = 0 + for i in device.getValidDeviceIdxsEnumerator(): + memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) ) + if memInfo.total > idx_mem: + idx = i + idx_mem = memInfo.total + + return idx + elif device.backend == "tensorflow-generic": + return 0 + + @staticmethod + def getWorstValidDeviceIdx(): + if device.backend == "plaidML": + idx = -1 + idx_mem = sys.maxsize + for i in device.getValidDeviceIdxsEnumerator(): + total = plaidML_devices[i]['globalMemSize'] + if total < idx_mem: + idx = i + idx_mem = total + + return idx + elif device.backend == "tensorflow": + idx = -1 + idx_mem = sys.maxsize + for i in device.getValidDeviceIdxsEnumerator(): + memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) ) + if memInfo.total < idx_mem: + idx = i + idx_mem = memInfo.total + + return idx + elif device.backend == "tensorflow-generic": + return 0 + + @staticmethod + def isValidDeviceIdx(idx): + if device.backend == "plaidML": + return idx in [*device.getValidDeviceIdxsEnumerator()] + elif device.backend == "tensorflow": + return idx in [*device.getValidDeviceIdxsEnumerator()] + elif device.backend == "tensorflow-generic": + return (idx == 0) + + @staticmethod + def getDeviceIdxsEqualModel(idx): + if device.backend == "plaidML": + result = [] + idx_name = plaidML_devices[idx]['description'] + for i in device.getValidDeviceIdxsEnumerator(): + if plaidML_devices[i]['description'] == idx_name: + result.append (i) + + return result + elif device.backend == "tensorflow": + result = [] + idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() + for i in device.getValidDeviceIdxsEnumerator(): + if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name: + result.append (i) + + return result + elif device.backend == "tensorflow-generic": + return [0] if idx == 0 else [] + + @staticmethod + def getDeviceName (idx): + if device.backend == "plaidML": + if idx < plaidML_devices_count: + return plaidML_devices[idx]['description'] + elif device.backend == "tensorflow": + if idx < nvmlDeviceGetCount(): + return nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() + elif device.backend == "tensorflow-generic": + if idx == 0: + return "Generic GeForce GPU" + + return None + + @staticmethod + def getDeviceID (idx): + if device.backend == "plaidML": + if idx < plaidML_devices_count: + return plaidML_devices[idx]['id'].decode() + + return None + + @staticmethod + def getDeviceComputeCapability(idx): + result = 0 + if device.backend == "plaidML": + return 99 + elif device.backend == "tensorflow": + if idx < nvmlDeviceGetCount(): + result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx)) + elif device.backend == "tensorflow-generic": + return 99 if idx == 0 else 0 + + return result[0] * 10 + result[1] + + +force_plaidML = os.environ.get("DFL_FORCE_PLAIDML", "0") == "1" #for OpenCL build , forcing using plaidML even if NVIDIA found +force_tf_cpu = os.environ.get("DFL_FORCE_TF_CPU", "0") == "1" #for OpenCL build , forcing using tf-cpu if plaidML failed +has_nvml = False +has_nvml_cap = False + +#use DFL_FORCE_HAS_NVIDIA_DEVICE=1 if +#- your NVIDIA cannot be seen by OpenCL +#- CUDA build of DFL +has_nvidia_device = os.environ.get("DFL_FORCE_HAS_NVIDIA_DEVICE", "0") == "1" + +plaidML_devices = None +def get_plaidML_devices(): + global plaidML_devices + global has_nvidia_device + if plaidML_devices is None: + plaidML_devices = [] + # Using plaidML OpenCL backend to determine system devices and has_nvidia_device + try: + os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup' + import plaidml + ctx = plaidml.Context() + for d in plaidml.devices(ctx, return_all=True)[0]: + details = json.loads(d.details) + if details['type'] == 'CPU': #skipping opencl-CPU + continue + if 'nvidia' in details['vendor'].lower(): + has_nvidia_device = True + plaidML_devices += [ {'id':d.id, + 'globalMemSize' : int(details['globalMemSize']), + 'description' : d.description.decode() + }] + ctx.shutdown() + except: + pass + return plaidML_devices + +if not has_nvidia_device: + get_plaidML_devices() + +#choosing backend + +if device.backend is None and not force_tf_cpu: + #first trying to load NVSMI and detect CUDA devices for tensorflow backend, + #even force_plaidML is choosed, because if plaidML will fail, we can choose tensorflow + try: + nvmlInit() + has_nvml = True + device.backend = "tensorflow" #set tensorflow backend in order to use device.*device() functions + + gpu_idxs = device.getAllDevicesIdxsList() + gpu_caps = np.array ( [ device.getDeviceComputeCapability(gpu_idx) for gpu_idx in gpu_idxs ] ) + + if len ( np.ndarray.flatten ( np.argwhere (gpu_caps >= tf_min_req_cap) ) ) == 0: + if not force_plaidML: + print ("No CUDA devices found with minimum required compute capability: %d.%d. Falling back to OpenCL mode." % (tf_min_req_cap // 10, tf_min_req_cap % 10) ) + device.backend = None + nvmlShutdown() + else: + has_nvml_cap = True + except: + #if no NVSMI installed exception will occur + device.backend = None + has_nvml = False + +if force_plaidML or (device.backend is None and not has_nvidia_device): + #tensorflow backend was failed without has_nvidia_device , or forcing plaidML, trying to use plaidML backend + if len(get_plaidML_devices()) == 0: + #print ("plaidML: No capable OpenCL devices found. Falling back to tensorflow backend.") + device.backend = None + else: + device.backend = "plaidML" + plaidML_devices_count = len(get_plaidML_devices()) + +if device.backend is None: + if force_tf_cpu: + device.backend = "tensorflow-cpu" + elif not has_nvml: + if has_nvidia_device: + #some notebook systems have NVIDIA card without NVSMI in official drivers + #in that case considering we have system with one capable GPU and let tensorflow to choose best GPU + device.backend = "tensorflow-generic" + else: + #no NVSMI and no NVIDIA cards, also plaidML was failed, then CPU only + device.backend = "tensorflow-cpu" + else: + if has_nvml_cap: + #has NVSMI and capable CUDA-devices, but force_plaidML was failed, then we choosing tensorflow + device.backend = "tensorflow" + else: + #has NVSMI, no capable CUDA-devices, also plaidML was failed, then CPU only + device.backend = "tensorflow-cpu" diff --git a/nnlib/nnlib.py b/nnlib/nnlib.py index edcb201..62a39c9 100644 --- a/nnlib/nnlib.py +++ b/nnlib/nnlib.py @@ -1,1048 +1,1048 @@ -import os -import sys -import contextlib -import numpy as np - -from .CAInitializer import CAGenerateWeights -import multiprocessing -from joblib import Subprocessor - -from utils import std_utils -from .device import device -from interact import interact as io - -class nnlib(object): - device = device #forwards nnlib.devicelib to device in order to use nnlib as standalone lib - DeviceConfig = device.Config - active_DeviceConfig = DeviceConfig() #default is one best GPU - - backend = "" - - dlib = None - - keras = None - keras_contrib = None - - tf = None - tf_sess = None - - PML = None - PMLK = None - PMLTile= None - - code_import_keras = None - code_import_keras_contrib = None - code_import_all = None - - code_import_dlib = None - - - ResNet = None - UNet = None - UNetTemporalPredictor = None - NLayerDiscriminator = None - - code_import_keras_string = \ -""" -keras = nnlib.keras -K = keras.backend -KL = keras.layers - -Input = KL.Input - -Dense = KL.Dense -Conv2D = nnlib.Conv2D -Conv2DTranspose = nnlib.Conv2DTranspose -SeparableConv2D = KL.SeparableConv2D -MaxPooling2D = KL.MaxPooling2D -UpSampling2D = KL.UpSampling2D -BatchNormalization = KL.BatchNormalization - -LeakyReLU = KL.LeakyReLU -ReLU = KL.ReLU -PReLU = KL.PReLU -tanh = KL.Activation('tanh') -sigmoid = KL.Activation('sigmoid') -Dropout = KL.Dropout -Softmax = KL.Softmax - -Lambda = KL.Lambda -Add = KL.Add -Concatenate = KL.Concatenate - - -Flatten = KL.Flatten -Reshape = KL.Reshape - -ZeroPadding2D = KL.ZeroPadding2D - -RandomNormal = keras.initializers.RandomNormal -Model = keras.models.Model - -Adam = nnlib.Adam - -modelify = nnlib.modelify -gaussian_blur = nnlib.gaussian_blur -style_loss = nnlib.style_loss -dssim = nnlib.dssim - -PixelShuffler = nnlib.PixelShuffler -SubpixelUpscaler = nnlib.SubpixelUpscaler -Scale = nnlib.Scale - -CAInitializerMP = nnlib.CAInitializerMP - -#ReflectionPadding2D = nnlib.ReflectionPadding2D -#AddUniformNoise = nnlib.AddUniformNoise -""" - code_import_keras_contrib_string = \ -""" -keras_contrib = nnlib.keras_contrib -GroupNormalization = keras_contrib.layers.GroupNormalization -InstanceNormalization = keras_contrib.layers.InstanceNormalization -""" - code_import_dlib_string = \ -""" -dlib = nnlib.dlib -""" - - code_import_all_string = \ -""" -DSSIMMSEMaskLoss = nnlib.DSSIMMSEMaskLoss -ResNet = nnlib.ResNet -UNet = nnlib.UNet -UNetTemporalPredictor = nnlib.UNetTemporalPredictor -NLayerDiscriminator = nnlib.NLayerDiscriminator -""" - - - @staticmethod - def _import_tf(device_config): - if nnlib.tf is not None: - return nnlib.code_import_tf - - if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': - suppressor = std_utils.suppress_stdout_stderr().__enter__() - else: - suppressor = None - - if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): - os.environ.pop('CUDA_VISIBLE_DEVICES') - - os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2' - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #tf log errors only - import tensorflow as tf - nnlib.tf = tf - - if device_config.cpu_only: - config = tf.ConfigProto(device_count={'GPU': 0}) - else: - config = tf.ConfigProto() - - if device_config.backend != "tensorflow-generic": - #tensorflow-generic is system with NVIDIA card, but w/o NVSMI - #so dont hide devices and let tensorflow to choose best card - visible_device_list = '' - for idx in device_config.gpu_idxs: - visible_device_list += str(idx) + ',' - config.gpu_options.visible_device_list=visible_device_list[:-1] - - config.gpu_options.force_gpu_compatible = True - config.gpu_options.allow_growth = device_config.allow_growth - - nnlib.tf_sess = tf.Session(config=config) - - if suppressor is not None: - suppressor.__exit__() - - @staticmethod - def import_keras(device_config): - if nnlib.keras is not None: - return nnlib.code_import_keras - - nnlib.backend = device_config.backend - if "tensorflow" in nnlib.backend: - nnlib._import_tf(device_config) - elif nnlib.backend == "plaidML": - os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" - os.environ["PLAIDML_DEVICE_IDS"] = ",".join ( [ nnlib.device.getDeviceID(idx) for idx in device_config.gpu_idxs] ) - - #if "tensorflow" in nnlib.backend: - # nnlib.keras = nnlib.tf.keras - #else: - import keras as keras_ - nnlib.keras = keras_ - - if 'KERAS_BACKEND' in os.environ: - os.environ.pop('KERAS_BACKEND') - - if nnlib.backend == "plaidML": - import plaidml - import plaidml.tile - nnlib.PML = plaidml - nnlib.PMLK = plaidml.keras.backend - nnlib.PMLTile = plaidml.tile - - if device_config.use_fp16: - nnlib.keras.backend.set_floatx('float16') - - if "tensorflow" in nnlib.backend: - nnlib.keras.backend.set_session(nnlib.tf_sess) - - nnlib.keras.backend.set_image_data_format('channels_last') - - nnlib.code_import_keras = compile (nnlib.code_import_keras_string,'','exec') - nnlib.__initialize_keras_functions() - - return nnlib.code_import_keras - - @staticmethod - def __initialize_keras_functions(): - keras = nnlib.keras - K = keras.backend - KL = keras.layers - backend = nnlib.backend - - def modelify(model_functor): - def func(tensor): - return keras.models.Model (tensor, model_functor(tensor)) - return func - - nnlib.modelify = modelify - - def gaussian_blur(radius=2.0): - def gaussian(x, mu, sigma): - return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2)) - - def make_kernel(sigma): - kernel_size = max(3, int(2 * 2 * sigma + 1)) - mean = np.floor(0.5 * kernel_size) - kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)]) - np_kernel = np.outer(kernel_1d, kernel_1d).astype(dtype=K.floatx()) - kernel = np_kernel / np.sum(np_kernel) - return kernel - - gauss_kernel = make_kernel(radius) - gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis] - - def func(input): - inputs = [ input[:,:,:,i:i+1] for i in range( K.int_shape( input )[-1] ) ] - - outputs = [] - for i in range(len(inputs)): - outputs += [ K.conv2d( inputs[i] , K.constant(gauss_kernel) , strides=(1,1), padding="same") ] - - return K.concatenate (outputs, axis=-1) - return func - nnlib.gaussian_blur = gaussian_blur - - def style_loss(gaussian_blur_radius=0.0, loss_weight=1.0, wnd_size=0, step_size=1): - if gaussian_blur_radius > 0.0: - gblur = gaussian_blur(gaussian_blur_radius) - - def sd(content, style, loss_weight): - content_nc = K.int_shape(content)[-1] - style_nc = K.int_shape(style)[-1] - if content_nc != style_nc: - raise Exception("style_loss() content_nc != style_nc") - - axes = [1,2] - c_mean, c_var = K.mean(content, axis=axes, keepdims=True), K.var(content, axis=axes, keepdims=True) - s_mean, s_var = K.mean(style, axis=axes, keepdims=True), K.var(style, axis=axes, keepdims=True) - c_std, s_std = K.sqrt(c_var + 1e-5), K.sqrt(s_var + 1e-5) - - mean_loss = K.sum(K.square(c_mean-s_mean)) - std_loss = K.sum(K.square(c_std-s_std)) - - return (mean_loss + std_loss) * ( loss_weight / float(content_nc) ) - - def func(target, style): - if wnd_size == 0: - if gaussian_blur_radius > 0.0: - return sd( gblur(target), gblur(style), loss_weight=loss_weight) - else: - return sd( target, style, loss_weight=loss_weight ) - else: - #currently unused - if nnlib.tf is not None: - sh = K.int_shape(target)[1] - k = (sh-wnd_size) // step_size + 1 - if gaussian_blur_radius > 0.0: - target, style = gblur(target), gblur(style) - target = nnlib.tf.image.extract_image_patches(target, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') - style = nnlib.tf.image.extract_image_patches(style, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') - return sd( target, style, loss_weight ) - if nnlib.PML is not None: - print ("Sorry, plaidML backend does not support style_loss") - return 0 - return func - nnlib.style_loss = style_loss - - def dssim(kernel_size=11, k1=0.01, k2=0.03, max_value=1.0): - # port of tf.image.ssim to pure keras in order to work on plaidML backend. - - def func(y_true, y_pred): - ch = K.shape(y_pred)[-1] - - def _fspecial_gauss(size, sigma): - #Function to mimic the 'fspecial' gaussian MATLAB function. - coords = np.arange(0, size, dtype=K.floatx()) - coords -= (size - 1 ) / 2.0 - g = coords**2 - g *= ( -0.5 / (sigma**2) ) - g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) ) - g = K.constant ( np.reshape (g, (1,-1)) ) - g = K.softmax(g) - g = K.reshape (g, (size, size, 1, 1)) - g = K.tile (g, (1,1,ch,1)) - return g - - kernel = _fspecial_gauss(kernel_size,1.5) - - def reducer(x): - return K.depthwise_conv2d(x, kernel, strides=(1, 1), padding='valid') - - c1 = (k1 * max_value) ** 2 - c2 = (k2 * max_value) ** 2 - - mean0 = reducer(y_true) - mean1 = reducer(y_pred) - num0 = mean0 * mean1 * 2.0 - den0 = K.square(mean0) + K.square(mean1) - luminance = (num0 + c1) / (den0 + c1) - - num1 = reducer(y_true * y_pred) * 2.0 - den1 = reducer(K.square(y_true) + K.square(y_pred)) - c2 *= 1.0 #compensation factor - cs = (num1 - num0 + c2) / (den1 - den0 + c2) - - ssim_val = K.mean(luminance * cs, axis=(-3, -2) ) - return(1.0 - ssim_val ) / 2.0 - - return func - - nnlib.dssim = dssim - - if 'tensorflow' in backend: - class PixelShuffler(keras.layers.Layer): - def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): - super(PixelShuffler, self).__init__(**kwargs) - self.data_format = data_format - self.size = size - - def call(self, inputs): - input_shape = K.shape(inputs) - if K.int_shape(input_shape)[0] != 4: - raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) - - if self.data_format == 'channels_first': - return K.tf.depth_to_space(inputs, self.size[0], 'NCHW') - - elif self.data_format == 'channels_last': - return K.tf.depth_to_space(inputs, self.size[0], 'NHWC') - - def compute_output_shape(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - '; Received input shape:', str(input_shape)) - - if self.data_format == 'channels_first': - height = input_shape[2] * self.size[0] if input_shape[2] is not None else None - width = input_shape[3] * self.size[1] if input_shape[3] is not None else None - channels = input_shape[1] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[1]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - channels, - height, - width) - - elif self.data_format == 'channels_last': - height = input_shape[1] * self.size[0] if input_shape[1] is not None else None - width = input_shape[2] * self.size[1] if input_shape[2] is not None else None - channels = input_shape[3] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[3]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - height, - width, - channels) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(PixelShuffler, self).get_config() - - return dict(list(base_config.items()) + list(config.items())) - else: - class PixelShuffler(KL.Layer): - def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): - super(PixelShuffler, self).__init__(**kwargs) - self.data_format = data_format - self.size = size - - def call(self, inputs): - - input_shape = K.shape(inputs) - if K.int_shape(input_shape)[0] != 4: - raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) - - if self.data_format == 'channels_first': - batch_size, c, h, w = input_shape[0], K.int_shape(inputs)[1], input_shape[2], input_shape[3] - rh, rw = self.size - oh, ow = h * rh, w * rw - oc = c // (rh * rw) - - out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w)) - out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) - out = K.reshape(out, (batch_size, oc, oh, ow)) - return out - - elif self.data_format == 'channels_last': - batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1] - rh, rw = self.size - oh, ow = h * rh, w * rw - oc = c // (rh * rw) - - out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc)) - out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) - out = K.reshape(out, (batch_size, oh, ow, oc)) - return out - - def compute_output_shape(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - '; Received input shape:', str(input_shape)) - - if self.data_format == 'channels_first': - height = input_shape[2] * self.size[0] if input_shape[2] is not None else None - width = input_shape[3] * self.size[1] if input_shape[3] is not None else None - channels = input_shape[1] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[1]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - channels, - height, - width) - - elif self.data_format == 'channels_last': - height = input_shape[1] * self.size[0] if input_shape[1] is not None else None - width = input_shape[2] * self.size[1] if input_shape[2] is not None else None - channels = input_shape[3] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[3]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - height, - width, - channels) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(PixelShuffler, self).get_config() - - return dict(list(base_config.items()) + list(config.items())) - - nnlib.PixelShuffler = PixelShuffler - nnlib.SubpixelUpscaler = PixelShuffler - - class Scale(KL.Layer): - """ - GAN Custom Scal Layer - Code borrows from https://github.com/flyyufelix/cnn_finetune - """ - def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs): - self.axis = axis - self.gamma_init = keras.initializers.get(gamma_init) - self.initial_weights = weights - super(Scale, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = [keras.engine.InputSpec(shape=input_shape)] - - # Compatibility with TensorFlow >= 1.0.0 - self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name)) - self.trainable_weights = [self.gamma] - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - - def call(self, x, mask=None): - return self.gamma * x - - def get_config(self): - config = {"axis": self.axis} - base_config = super(Scale, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - nnlib.Scale = Scale - - class Adam(keras.optimizers.Optimizer): - """Adam optimizer. - - Default parameters follow those provided in the original paper. - - # Arguments - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 1. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - amsgrad: boolean. Whether to apply the AMSGrad variant of this - algorithm from the paper "On the Convergence of Adam and - Beyond". - tf_cpu_mode: only for tensorflow backend - 0 - default, no changes. - 1 - allows to train x2 bigger network on same VRAM consuming RAM - 2 - allows to train x3 bigger network on same VRAM consuming RAM*2 and CPU power. - - # References - - [Adam - A Method for Stochastic Optimization] - (https://arxiv.org/abs/1412.6980v8) - - [On the Convergence of Adam and Beyond] - (https://openreview.net/forum?id=ryQu7f-RZ) - """ - - def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, - epsilon=None, decay=0., amsgrad=False, tf_cpu_mode=0, **kwargs): - super(Adam, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - self.decay = K.variable(decay, name='decay') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - self.amsgrad = amsgrad - self.tf_cpu_mode = tf_cpu_mode - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - t = K.cast(self.iterations, K.floatx()) + 1 - lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / - (1. - K.pow(self.beta_1, t))) - - e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None - if e: e.__enter__() - ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - if self.amsgrad: - vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - else: - vhats = [K.zeros(1) for _ in params] - if e: e.__exit__(None, None, None) - - self.weights = [self.iterations] + ms + vs + vhats - - for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): - e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None - if e: e.__enter__() - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) - - if self.amsgrad: - vhat_t = K.maximum(vhat, v_t) - self.updates.append(K.update(vhat, vhat_t)) - if e: e.__exit__(None, None, None) - - if self.amsgrad: - p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) - else: - p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad} - base_config = super(Adam, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - nnlib.Adam = Adam - - def CAInitializerMP( conv_weights_list ): - #Convolution Aware Initialization https://arxiv.org/abs/1702.06295 - result = CAInitializerMPSubprocessor ( [ (i, K.int_shape(conv_weights)) for i, conv_weights in enumerate(conv_weights_list) ], K.floatx(), K.image_data_format() ).run() - for idx, weights in result: - K.set_value ( conv_weights_list[idx], weights ) - nnlib.CAInitializerMP = CAInitializerMP - - - if backend == "plaidML": - class TileOP_ReflectionPadding2D(nnlib.PMLTile.Operation): - def __init__(self, input, w_pad, h_pad): - if K.image_data_format() == 'channels_last': - if input.shape.ndims == 4: - H, W = input.shape.dims[1:3] - if (type(H) == int and h_pad >= H) or \ - (type(W) == int and w_pad >= W): - raise ValueError("Paddings must be less than dimensions.") - - c = """ function (I[B, H, W, C] ) -> (O) {{ - WE = W + {w_pad}*2; - HE = H + {h_pad}*2; - """.format(h_pad=h_pad, w_pad=w_pad) - if w_pad > 0: - c += """ - LEFT_PAD [b, h, w , c : B, H, WE, C ] = =(I[b, h, {w_pad}-w, c]), w < {w_pad} ; - HCENTER [b, h, w , c : B, H, WE, C ] = =(I[b, h, w-{w_pad}, c]), w < W+{w_pad}-1 ; - RIGHT_PAD[b, h, w , c : B, H, WE, C ] = =(I[b, h, 2*W - (w-{w_pad}) -2, c]); - LCR = LEFT_PAD+HCENTER+RIGHT_PAD; - """.format(h_pad=h_pad, w_pad=w_pad) - else: - c += "LCR = I;" - - if h_pad > 0: - c += """ - TOP_PAD [b, h, w , c : B, HE, WE, C ] = =(LCR[b, {h_pad}-h, w, c]), h < {h_pad}; - VCENTER [b, h, w , c : B, HE, WE, C ] = =(LCR[b, h-{h_pad}, w, c]), h < H+{h_pad}-1 ; - BOTTOM_PAD[b, h, w , c : B, HE, WE, C ] = =(LCR[b, 2*H - (h-{h_pad}) -2, w, c]); - TVB = TOP_PAD+VCENTER+BOTTOM_PAD; - """.format(h_pad=h_pad, w_pad=w_pad) - else: - c += "TVB = LCR;" - - c += "O = TVB; }" - - inp_dims = input.shape.dims - out_dims = (inp_dims[0], inp_dims[1]+h_pad*2, inp_dims[2]+w_pad*2, inp_dims[3]) - else: - raise NotImplemented - else: - raise NotImplemented - - super(TileOP_ReflectionPadding2D, self).__init__(c, [('I', input) ], - [('O', nnlib.PMLTile.Shape(input.shape.dtype, out_dims ) )]) - - class ReflectionPadding2D(keras.layers.Layer): - def __init__(self, padding=(1, 1), **kwargs): - self.padding = tuple(padding) - self.input_spec = [keras.layers.InputSpec(ndim=4)] - super(ReflectionPadding2D, self).__init__(**kwargs) - - def compute_output_shape(self, s): - """ If you are using "channels_last" configuration""" - return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3]) - - def call(self, x, mask=None): - w_pad,h_pad = self.padding - if "tensorflow" in backend: - return K.tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT') - elif backend == "plaidML": - return TileOP_ReflectionPadding2D.function(x, self.padding[0], self.padding[1]) - else: - if K.image_data_format() == 'channels_last': - if x.shape.ndims == 4: - w = K.concatenate ([ x[:,:,w_pad:0:-1,:], - x, - x[:,:,-2:-w_pad-2:-1,:] ], axis=2 ) - h = K.concatenate ([ w[:,h_pad:0:-1,:,:], - w, - w[:,-2:-h_pad-2:-1,:,:] ], axis=1 ) - return h - else: - raise NotImplemented - else: - raise NotImplemented - - nnlib.ReflectionPadding2D = ReflectionPadding2D - - class Conv2D(): - def __init__ (self, *args, **kwargs): - self.reflect_pad = False - padding = kwargs.get('padding','') - if padding == 'zero': - kwargs['padding'] = 'same' - if padding == 'reflect': - kernel_size = kwargs['kernel_size'] - if (kernel_size % 2) == 1: - self.pad = (kernel_size // 2,)*2 - kwargs['padding'] = 'valid' - self.reflect_pad = True - self.func = keras.layers.Conv2D (*args, **kwargs) - - def __call__(self,x): - if self.reflect_pad: - x = ReflectionPadding2D( self.pad ) (x) - return self.func(x) - nnlib.Conv2D = Conv2D - - class Conv2DTranspose(): - def __init__ (self, *args, **kwargs): - self.reflect_pad = False - padding = kwargs.get('padding','') - if padding == 'zero': - kwargs['padding'] = 'same' - if padding == 'reflect': - kernel_size = kwargs['kernel_size'] - if (kernel_size % 2) == 1: - self.pad = (kernel_size // 2,)*2 - kwargs['padding'] = 'valid' - self.reflect_pad = True - self.func = keras.layers.Conv2DTranspose (*args, **kwargs) - - def __call__(self,x): - if self.reflect_pad: - x = ReflectionPadding2D( self.pad ) (x) - return self.func(x) - nnlib.Conv2DTranspose = Conv2DTranspose - - @staticmethod - def import_keras_contrib(device_config): - if nnlib.keras_contrib is not None: - return nnlib.code_import_keras_contrib - - import keras_contrib as keras_contrib_ - nnlib.keras_contrib = keras_contrib_ - nnlib.__initialize_keras_contrib_functions() - nnlib.code_import_keras_contrib = compile (nnlib.code_import_keras_contrib_string,'','exec') - - @staticmethod - def __initialize_keras_contrib_functions(): - pass - - @staticmethod - def import_dlib( device_config = None): - if nnlib.dlib is not None: - return nnlib.code_import_dlib - - import dlib as dlib_ - nnlib.dlib = dlib_ - if not device_config.cpu_only and "tensorflow" in device_config.backend and len(device_config.gpu_idxs) > 0: - nnlib.dlib.cuda.set_device(device_config.gpu_idxs[0]) - - nnlib.code_import_dlib = compile (nnlib.code_import_dlib_string,'','exec') - - @staticmethod - def import_all(device_config = None): - if nnlib.code_import_all is None: - if device_config is None: - device_config = nnlib.active_DeviceConfig - else: - nnlib.active_DeviceConfig = device_config - - nnlib.import_keras(device_config) - nnlib.import_keras_contrib(device_config) - nnlib.code_import_all = compile (nnlib.code_import_keras_string + '\n' - + nnlib.code_import_keras_contrib_string - + nnlib.code_import_all_string,'','exec') - nnlib.__initialize_all_functions() - - return nnlib.code_import_all - - @staticmethod - def __initialize_all_functions(): - exec (nnlib.import_keras(nnlib.active_DeviceConfig), locals(), globals()) - exec (nnlib.import_keras_contrib(nnlib.active_DeviceConfig), locals(), globals()) - - class DSSIMMSEMaskLoss(object): - def __init__(self, mask, is_mse=False): - self.mask = mask - self.is_mse = is_mse - def __call__(self,y_true, y_pred): - total_loss = None - mask = self.mask - if self.is_mse: - blur_mask = gaussian_blur(max(1, K.int_shape(mask)[1] // 64))(mask) - return K.mean ( 50*K.square( y_true*blur_mask - y_pred*blur_mask ) ) - else: - return 10*dssim() (y_true*mask, y_pred*mask) - nnlib.DSSIMMSEMaskLoss = DSSIMMSEMaskLoss - - - ''' - def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - - def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) - - def func(input): - - - def ResnetBlock(dim): - def func(input): - x = input - - x = ReflectionPadding2D((1,1))(x) - x = Conv2D(dim, 3, 1, padding='valid')(x) - x = XNormalization(x) - x = ReLU()(x) - - if use_dropout: - x = Dropout(0.5)(x) - - x = ReflectionPadding2D((1,1))(x) - x = Conv2D(dim, 3, 1, padding='valid')(x) - x = XNormalization(x) - x = ReLU()(x) - return Add()([x,input]) - return func - - x = input - - x = ReflectionPadding2D((3,3))(x) - x = Conv2D(ngf, 7, 1, 'valid')(x) - - x = ReLU()(XNormalization(Conv2D(ngf*2, 4, 2, 'same')(x))) - x = ReLU()(XNormalization(Conv2D(ngf*4, 4, 2, 'same')(x))) - - for i in range(n_blocks): - x = ResnetBlock(ngf*4)(x) - - x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf*2 *4, 3, 1, 'same')(x)))) - x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf *4, 3, 1, 'same')(x)))) - - x = ReflectionPadding2D((3,3))(x) - x = Conv2D(output_nc, 7, 1, 'valid')(x) - x = tanh(x) - - return x - - return func - - nnlib.ResNet = ResNet - - # Defines the Unet generator. - # |num_downs|: number of downsamplings in UNet. For example, - # if |num_downs| == 7, image of size 128x128 will become of size 1x1 - # at the bottleneck - def UNet(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - - def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) - - def UNetSkipConnection(outer_nc, inner_nc, sub_model=None, outermost=False, innermost=False, use_dropout=False): - def func(inp): - x = inp - - x = Conv2D(inner_nc, 4, 2, 'valid')(ReflectionPadding2D( (1,1) )(x)) - x = XNormalization(x) - x = ReLU()(x) - - if not innermost: - x = sub_model(x) - - if not outermost: - x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) - x = XNormalization(x) - x = ReLU()(x) - - if not innermost: - if use_dropout: - x = Dropout(0.5)(x) - - x = Concatenate(axis=3)([inp, x]) - else: - x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) - x = tanh(x) - - - return x - - return func - - def func(input): - - unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=None, innermost=True) - - for i in range(num_downs - 5): - unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=unet_block, use_dropout=use_dropout) - - unet_block = UNetSkipConnection(ngf * 4 , ngf * 8, sub_model=unet_block) - unet_block = UNetSkipConnection(ngf * 2 , ngf * 4, sub_model=unet_block) - unet_block = UNetSkipConnection(ngf , ngf * 2, sub_model=unet_block) - unet_block = UNetSkipConnection(output_nc, ngf , sub_model=unet_block, outermost=True) - - return unet_block(input) - return func - nnlib.UNet = UNet - - #predicts based on two past_image_tensors - def UNetTemporalPredictor(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - def func(inputs): - past_2_image_tensor, past_1_image_tensor = inputs - - x = Concatenate(axis=3)([ past_2_image_tensor, past_1_image_tensor ]) - x = UNet(3, use_batch_norm, num_downs=num_downs, ngf=ngf, use_dropout=use_dropout) (x) - - return x - - return func - nnlib.UNetTemporalPredictor = UNetTemporalPredictor - - def NLayerDiscriminator(use_batch_norm, ndf=64, n_layers=3): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - - def func(input): - x = input - - x = ZeroPadding2D((1,1))(x) - x = Conv2D( ndf, 4, 2, 'valid')(x) - x = LeakyReLU(0.2)(x) - - for i in range(1, n_layers): - x = ZeroPadding2D((1,1))(x) - x = Conv2D( ndf * min(2 ** i, 8), 4, 2, 'valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = Conv2D( ndf * min(2 ** n_layers, 8), 4, 1, 'valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - return Conv2D( 1, 4, 1, 'valid')(x) - return func - nnlib.NLayerDiscriminator = NLayerDiscriminator - ''' - @staticmethod - def finalize_all(): - if nnlib.keras_contrib is not None: - nnlib.keras_contrib = None - - if nnlib.keras is not None: - nnlib.keras.backend.clear_session() - nnlib.keras = None - - if nnlib.tf is not None: - nnlib.tf_sess = None - nnlib.tf = None - - -class CAInitializerMPSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - self.floatx = client_dict['floatx'] - self.data_format = client_dict['data_format'] - - #override - def process_data(self, data): - idx, shape = data - weights = CAGenerateWeights (shape, self.floatx, self.data_format) - return idx, weights - - #override - def get_data_name (self, data): - #return string identificator of your data - return "undefined" - - #override - def __init__(self, idx_shapes_list, floatx, data_format ): - - self.idx_shapes_list = idx_shapes_list - self.floatx = floatx - self.data_format = data_format - - self.result = [] - super().__init__('CAInitializerMP', CAInitializerMPSubprocessor.Cli) - - #override - def on_clients_initialized(self): - io.progress_bar ("Initializing CA weights", len (self.idx_shapes_list)) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def process_info_generator(self): - for i in range(multiprocessing.cpu_count()): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'floatx' : self.floatx, - 'data_format' : self.data_format - } - - #override - def get_data(self, host_dict): - if len (self.idx_shapes_list) > 0: - return self.idx_shapes_list.pop(0) - - return None - - #override - def on_data_return (self, host_dict, data): - self.idx_shapes_list.insert(0, data) - - #override - def on_result (self, host_dict, data, result): - self.result.append ( result ) - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.result +import os +import sys +import contextlib +import numpy as np + +from .CAInitializer import CAGenerateWeights +import multiprocessing +from joblib import Subprocessor + +from utils import std_utils +from .device import device +from interact import interact as io + +class nnlib(object): + device = device #forwards nnlib.devicelib to device in order to use nnlib as standalone lib + DeviceConfig = device.Config + active_DeviceConfig = DeviceConfig() #default is one best GPU + + backend = "" + + dlib = None + + keras = None + keras_contrib = None + + tf = None + tf_sess = None + + PML = None + PMLK = None + PMLTile= None + + code_import_keras = None + code_import_keras_contrib = None + code_import_all = None + + code_import_dlib = None + + + ResNet = None + UNet = None + UNetTemporalPredictor = None + NLayerDiscriminator = None + + code_import_keras_string = \ +""" +keras = nnlib.keras +K = keras.backend +KL = keras.layers + +Input = KL.Input + +Dense = KL.Dense +Conv2D = nnlib.Conv2D +Conv2DTranspose = nnlib.Conv2DTranspose +SeparableConv2D = KL.SeparableConv2D +MaxPooling2D = KL.MaxPooling2D +UpSampling2D = KL.UpSampling2D +BatchNormalization = KL.BatchNormalization + +LeakyReLU = KL.LeakyReLU +ReLU = KL.ReLU +PReLU = KL.PReLU +tanh = KL.Activation('tanh') +sigmoid = KL.Activation('sigmoid') +Dropout = KL.Dropout +Softmax = KL.Softmax + +Lambda = KL.Lambda +Add = KL.Add +Concatenate = KL.Concatenate + + +Flatten = KL.Flatten +Reshape = KL.Reshape + +ZeroPadding2D = KL.ZeroPadding2D + +RandomNormal = keras.initializers.RandomNormal +Model = keras.models.Model + +Adam = nnlib.Adam + +modelify = nnlib.modelify +gaussian_blur = nnlib.gaussian_blur +style_loss = nnlib.style_loss +dssim = nnlib.dssim + +PixelShuffler = nnlib.PixelShuffler +SubpixelUpscaler = nnlib.SubpixelUpscaler +Scale = nnlib.Scale + +CAInitializerMP = nnlib.CAInitializerMP + +#ReflectionPadding2D = nnlib.ReflectionPadding2D +#AddUniformNoise = nnlib.AddUniformNoise +""" + code_import_keras_contrib_string = \ +""" +keras_contrib = nnlib.keras_contrib +GroupNormalization = keras_contrib.layers.GroupNormalization +InstanceNormalization = keras_contrib.layers.InstanceNormalization +""" + code_import_dlib_string = \ +""" +dlib = nnlib.dlib +""" + + code_import_all_string = \ +""" +DSSIMMSEMaskLoss = nnlib.DSSIMMSEMaskLoss +ResNet = nnlib.ResNet +UNet = nnlib.UNet +UNetTemporalPredictor = nnlib.UNetTemporalPredictor +NLayerDiscriminator = nnlib.NLayerDiscriminator +""" + + + @staticmethod + def _import_tf(device_config): + if nnlib.tf is not None: + return nnlib.code_import_tf + + if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': + suppressor = std_utils.suppress_stdout_stderr().__enter__() + else: + suppressor = None + + if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): + os.environ.pop('CUDA_VISIBLE_DEVICES') + + os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2' + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #tf log errors only + import tensorflow as tf + nnlib.tf = tf + + if device_config.cpu_only: + config = tf.ConfigProto(device_count={'GPU': 0}) + else: + config = tf.ConfigProto() + + if device_config.backend != "tensorflow-generic": + #tensorflow-generic is system with NVIDIA card, but w/o NVSMI + #so dont hide devices and let tensorflow to choose best card + visible_device_list = '' + for idx in device_config.gpu_idxs: + visible_device_list += str(idx) + ',' + config.gpu_options.visible_device_list=visible_device_list[:-1] + + config.gpu_options.force_gpu_compatible = True + config.gpu_options.allow_growth = device_config.allow_growth + + nnlib.tf_sess = tf.Session(config=config) + + if suppressor is not None: + suppressor.__exit__() + + @staticmethod + def import_keras(device_config): + if nnlib.keras is not None: + return nnlib.code_import_keras + + nnlib.backend = device_config.backend + if "tensorflow" in nnlib.backend: + nnlib._import_tf(device_config) + elif nnlib.backend == "plaidML": + os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" + os.environ["PLAIDML_DEVICE_IDS"] = ",".join ( [ nnlib.device.getDeviceID(idx) for idx in device_config.gpu_idxs] ) + + #if "tensorflow" in nnlib.backend: + # nnlib.keras = nnlib.tf.keras + #else: + import keras as keras_ + nnlib.keras = keras_ + + if 'KERAS_BACKEND' in os.environ: + os.environ.pop('KERAS_BACKEND') + + if nnlib.backend == "plaidML": + import plaidml + import plaidml.tile + nnlib.PML = plaidml + nnlib.PMLK = plaidml.keras.backend + nnlib.PMLTile = plaidml.tile + + if device_config.use_fp16: + nnlib.keras.backend.set_floatx('float16') + + if "tensorflow" in nnlib.backend: + nnlib.keras.backend.set_session(nnlib.tf_sess) + + nnlib.keras.backend.set_image_data_format('channels_last') + + nnlib.code_import_keras = compile (nnlib.code_import_keras_string,'','exec') + nnlib.__initialize_keras_functions() + + return nnlib.code_import_keras + + @staticmethod + def __initialize_keras_functions(): + keras = nnlib.keras + K = keras.backend + KL = keras.layers + backend = nnlib.backend + + def modelify(model_functor): + def func(tensor): + return keras.models.Model (tensor, model_functor(tensor)) + return func + + nnlib.modelify = modelify + + def gaussian_blur(radius=2.0): + def gaussian(x, mu, sigma): + return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2)) + + def make_kernel(sigma): + kernel_size = max(3, int(2 * 2 * sigma + 1)) + mean = np.floor(0.5 * kernel_size) + kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)]) + np_kernel = np.outer(kernel_1d, kernel_1d).astype(dtype=K.floatx()) + kernel = np_kernel / np.sum(np_kernel) + return kernel + + gauss_kernel = make_kernel(radius) + gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis] + + def func(input): + inputs = [ input[:,:,:,i:i+1] for i in range( K.int_shape( input )[-1] ) ] + + outputs = [] + for i in range(len(inputs)): + outputs += [ K.conv2d( inputs[i] , K.constant(gauss_kernel) , strides=(1,1), padding="same") ] + + return K.concatenate (outputs, axis=-1) + return func + nnlib.gaussian_blur = gaussian_blur + + def style_loss(gaussian_blur_radius=0.0, loss_weight=1.0, wnd_size=0, step_size=1): + if gaussian_blur_radius > 0.0: + gblur = gaussian_blur(gaussian_blur_radius) + + def sd(content, style, loss_weight): + content_nc = K.int_shape(content)[-1] + style_nc = K.int_shape(style)[-1] + if content_nc != style_nc: + raise Exception("style_loss() content_nc != style_nc") + + axes = [1,2] + c_mean, c_var = K.mean(content, axis=axes, keepdims=True), K.var(content, axis=axes, keepdims=True) + s_mean, s_var = K.mean(style, axis=axes, keepdims=True), K.var(style, axis=axes, keepdims=True) + c_std, s_std = K.sqrt(c_var + 1e-5), K.sqrt(s_var + 1e-5) + + mean_loss = K.sum(K.square(c_mean-s_mean)) + std_loss = K.sum(K.square(c_std-s_std)) + + return (mean_loss + std_loss) * ( loss_weight / float(content_nc) ) + + def func(target, style): + if wnd_size == 0: + if gaussian_blur_radius > 0.0: + return sd( gblur(target), gblur(style), loss_weight=loss_weight) + else: + return sd( target, style, loss_weight=loss_weight ) + else: + #currently unused + if nnlib.tf is not None: + sh = K.int_shape(target)[1] + k = (sh-wnd_size) // step_size + 1 + if gaussian_blur_radius > 0.0: + target, style = gblur(target), gblur(style) + target = nnlib.tf.image.extract_image_patches(target, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') + style = nnlib.tf.image.extract_image_patches(style, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') + return sd( target, style, loss_weight ) + if nnlib.PML is not None: + print ("Sorry, plaidML backend does not support style_loss") + return 0 + return func + nnlib.style_loss = style_loss + + def dssim(kernel_size=11, k1=0.01, k2=0.03, max_value=1.0): + # port of tf.image.ssim to pure keras in order to work on plaidML backend. + + def func(y_true, y_pred): + ch = K.shape(y_pred)[-1] + + def _fspecial_gauss(size, sigma): + #Function to mimic the 'fspecial' gaussian MATLAB function. + coords = np.arange(0, size, dtype=K.floatx()) + coords -= (size - 1 ) / 2.0 + g = coords**2 + g *= ( -0.5 / (sigma**2) ) + g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) ) + g = K.constant ( np.reshape (g, (1,-1)) ) + g = K.softmax(g) + g = K.reshape (g, (size, size, 1, 1)) + g = K.tile (g, (1,1,ch,1)) + return g + + kernel = _fspecial_gauss(kernel_size,1.5) + + def reducer(x): + return K.depthwise_conv2d(x, kernel, strides=(1, 1), padding='valid') + + c1 = (k1 * max_value) ** 2 + c2 = (k2 * max_value) ** 2 + + mean0 = reducer(y_true) + mean1 = reducer(y_pred) + num0 = mean0 * mean1 * 2.0 + den0 = K.square(mean0) + K.square(mean1) + luminance = (num0 + c1) / (den0 + c1) + + num1 = reducer(y_true * y_pred) * 2.0 + den1 = reducer(K.square(y_true) + K.square(y_pred)) + c2 *= 1.0 #compensation factor + cs = (num1 - num0 + c2) / (den1 - den0 + c2) + + ssim_val = K.mean(luminance * cs, axis=(-3, -2) ) + return(1.0 - ssim_val ) / 2.0 + + return func + + nnlib.dssim = dssim + + if 'tensorflow' in backend: + class PixelShuffler(keras.layers.Layer): + def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): + super(PixelShuffler, self).__init__(**kwargs) + self.data_format = data_format + self.size = size + + def call(self, inputs): + input_shape = K.shape(inputs) + if K.int_shape(input_shape)[0] != 4: + raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) + + if self.data_format == 'channels_first': + return K.tf.depth_to_space(inputs, self.size[0], 'NCHW') + + elif self.data_format == 'channels_last': + return K.tf.depth_to_space(inputs, self.size[0], 'NHWC') + + def compute_output_shape(self, input_shape): + if len(input_shape) != 4: + raise ValueError('Inputs should have rank ' + + str(4) + + '; Received input shape:', str(input_shape)) + + if self.data_format == 'channels_first': + height = input_shape[2] * self.size[0] if input_shape[2] is not None else None + width = input_shape[3] * self.size[1] if input_shape[3] is not None else None + channels = input_shape[1] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[1]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + channels, + height, + width) + + elif self.data_format == 'channels_last': + height = input_shape[1] * self.size[0] if input_shape[1] is not None else None + width = input_shape[2] * self.size[1] if input_shape[2] is not None else None + channels = input_shape[3] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[3]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + height, + width, + channels) + + def get_config(self): + config = {'size': self.size, + 'data_format': self.data_format} + base_config = super(PixelShuffler, self).get_config() + + return dict(list(base_config.items()) + list(config.items())) + else: + class PixelShuffler(KL.Layer): + def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): + super(PixelShuffler, self).__init__(**kwargs) + self.data_format = data_format + self.size = size + + def call(self, inputs): + + input_shape = K.shape(inputs) + if K.int_shape(input_shape)[0] != 4: + raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) + + if self.data_format == 'channels_first': + batch_size, c, h, w = input_shape[0], K.int_shape(inputs)[1], input_shape[2], input_shape[3] + rh, rw = self.size + oh, ow = h * rh, w * rw + oc = c // (rh * rw) + + out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w)) + out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) + out = K.reshape(out, (batch_size, oc, oh, ow)) + return out + + elif self.data_format == 'channels_last': + batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1] + rh, rw = self.size + oh, ow = h * rh, w * rw + oc = c // (rh * rw) + + out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc)) + out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) + out = K.reshape(out, (batch_size, oh, ow, oc)) + return out + + def compute_output_shape(self, input_shape): + if len(input_shape) != 4: + raise ValueError('Inputs should have rank ' + + str(4) + + '; Received input shape:', str(input_shape)) + + if self.data_format == 'channels_first': + height = input_shape[2] * self.size[0] if input_shape[2] is not None else None + width = input_shape[3] * self.size[1] if input_shape[3] is not None else None + channels = input_shape[1] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[1]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + channels, + height, + width) + + elif self.data_format == 'channels_last': + height = input_shape[1] * self.size[0] if input_shape[1] is not None else None + width = input_shape[2] * self.size[1] if input_shape[2] is not None else None + channels = input_shape[3] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[3]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + height, + width, + channels) + + def get_config(self): + config = {'size': self.size, + 'data_format': self.data_format} + base_config = super(PixelShuffler, self).get_config() + + return dict(list(base_config.items()) + list(config.items())) + + nnlib.PixelShuffler = PixelShuffler + nnlib.SubpixelUpscaler = PixelShuffler + + class Scale(KL.Layer): + """ + GAN Custom Scal Layer + Code borrows from https://github.com/flyyufelix/cnn_finetune + """ + def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs): + self.axis = axis + self.gamma_init = keras.initializers.get(gamma_init) + self.initial_weights = weights + super(Scale, self).__init__(**kwargs) + + def build(self, input_shape): + self.input_spec = [keras.engine.InputSpec(shape=input_shape)] + + # Compatibility with TensorFlow >= 1.0.0 + self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name)) + self.trainable_weights = [self.gamma] + + if self.initial_weights is not None: + self.set_weights(self.initial_weights) + del self.initial_weights + + def call(self, x, mask=None): + return self.gamma * x + + def get_config(self): + config = {"axis": self.axis} + base_config = super(Scale, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + nnlib.Scale = Scale + + class Adam(keras.optimizers.Optimizer): + """Adam optimizer. + + Default parameters follow those provided in the original paper. + + # Arguments + lr: float >= 0. Learning rate. + beta_1: float, 0 < beta < 1. Generally close to 1. + beta_2: float, 0 < beta < 1. Generally close to 1. + epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. + decay: float >= 0. Learning rate decay over each update. + amsgrad: boolean. Whether to apply the AMSGrad variant of this + algorithm from the paper "On the Convergence of Adam and + Beyond". + tf_cpu_mode: only for tensorflow backend + 0 - default, no changes. + 1 - allows to train x2 bigger network on same VRAM consuming RAM + 2 - allows to train x3 bigger network on same VRAM consuming RAM*2 and CPU power. + + # References + - [Adam - A Method for Stochastic Optimization] + (https://arxiv.org/abs/1412.6980v8) + - [On the Convergence of Adam and Beyond] + (https://openreview.net/forum?id=ryQu7f-RZ) + """ + + def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, + epsilon=None, decay=0., amsgrad=False, tf_cpu_mode=0, **kwargs): + super(Adam, self).__init__(**kwargs) + with K.name_scope(self.__class__.__name__): + self.iterations = K.variable(0, dtype='int64', name='iterations') + self.lr = K.variable(lr, name='lr') + self.beta_1 = K.variable(beta_1, name='beta_1') + self.beta_2 = K.variable(beta_2, name='beta_2') + self.decay = K.variable(decay, name='decay') + if epsilon is None: + epsilon = K.epsilon() + self.epsilon = epsilon + self.initial_decay = decay + self.amsgrad = amsgrad + self.tf_cpu_mode = tf_cpu_mode + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + self.updates = [K.update_add(self.iterations, 1)] + + lr = self.lr + if self.initial_decay > 0: + lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, + K.dtype(self.decay)))) + + t = K.cast(self.iterations, K.floatx()) + 1 + lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / + (1. - K.pow(self.beta_1, t))) + + e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None + if e: e.__enter__() + ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] + vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] + if self.amsgrad: + vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] + else: + vhats = [K.zeros(1) for _ in params] + if e: e.__exit__(None, None, None) + + self.weights = [self.iterations] + ms + vs + vhats + + for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): + e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None + if e: e.__enter__() + m_t = (self.beta_1 * m) + (1. - self.beta_1) * g + v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) + + if self.amsgrad: + vhat_t = K.maximum(vhat, v_t) + self.updates.append(K.update(vhat, vhat_t)) + if e: e.__exit__(None, None, None) + + if self.amsgrad: + p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) + else: + p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) + + self.updates.append(K.update(m, m_t)) + self.updates.append(K.update(v, v_t)) + new_p = p_t + + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(K.update(p, new_p)) + return self.updates + + def get_config(self): + config = {'lr': float(K.get_value(self.lr)), + 'beta_1': float(K.get_value(self.beta_1)), + 'beta_2': float(K.get_value(self.beta_2)), + 'decay': float(K.get_value(self.decay)), + 'epsilon': self.epsilon, + 'amsgrad': self.amsgrad} + base_config = super(Adam, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + nnlib.Adam = Adam + + def CAInitializerMP( conv_weights_list ): + #Convolution Aware Initialization https://arxiv.org/abs/1702.06295 + result = CAInitializerMPSubprocessor ( [ (i, K.int_shape(conv_weights)) for i, conv_weights in enumerate(conv_weights_list) ], K.floatx(), K.image_data_format() ).run() + for idx, weights in result: + K.set_value ( conv_weights_list[idx], weights ) + nnlib.CAInitializerMP = CAInitializerMP + + + if backend == "plaidML": + class TileOP_ReflectionPadding2D(nnlib.PMLTile.Operation): + def __init__(self, input, w_pad, h_pad): + if K.image_data_format() == 'channels_last': + if input.shape.ndims == 4: + H, W = input.shape.dims[1:3] + if (type(H) == int and h_pad >= H) or \ + (type(W) == int and w_pad >= W): + raise ValueError("Paddings must be less than dimensions.") + + c = """ function (I[B, H, W, C] ) -> (O) {{ + WE = W + {w_pad}*2; + HE = H + {h_pad}*2; + """.format(h_pad=h_pad, w_pad=w_pad) + if w_pad > 0: + c += """ + LEFT_PAD [b, h, w , c : B, H, WE, C ] = =(I[b, h, {w_pad}-w, c]), w < {w_pad} ; + HCENTER [b, h, w , c : B, H, WE, C ] = =(I[b, h, w-{w_pad}, c]), w < W+{w_pad}-1 ; + RIGHT_PAD[b, h, w , c : B, H, WE, C ] = =(I[b, h, 2*W - (w-{w_pad}) -2, c]); + LCR = LEFT_PAD+HCENTER+RIGHT_PAD; + """.format(h_pad=h_pad, w_pad=w_pad) + else: + c += "LCR = I;" + + if h_pad > 0: + c += """ + TOP_PAD [b, h, w , c : B, HE, WE, C ] = =(LCR[b, {h_pad}-h, w, c]), h < {h_pad}; + VCENTER [b, h, w , c : B, HE, WE, C ] = =(LCR[b, h-{h_pad}, w, c]), h < H+{h_pad}-1 ; + BOTTOM_PAD[b, h, w , c : B, HE, WE, C ] = =(LCR[b, 2*H - (h-{h_pad}) -2, w, c]); + TVB = TOP_PAD+VCENTER+BOTTOM_PAD; + """.format(h_pad=h_pad, w_pad=w_pad) + else: + c += "TVB = LCR;" + + c += "O = TVB; }" + + inp_dims = input.shape.dims + out_dims = (inp_dims[0], inp_dims[1]+h_pad*2, inp_dims[2]+w_pad*2, inp_dims[3]) + else: + raise NotImplemented + else: + raise NotImplemented + + super(TileOP_ReflectionPadding2D, self).__init__(c, [('I', input) ], + [('O', nnlib.PMLTile.Shape(input.shape.dtype, out_dims ) )]) + + class ReflectionPadding2D(keras.layers.Layer): + def __init__(self, padding=(1, 1), **kwargs): + self.padding = tuple(padding) + self.input_spec = [keras.layers.InputSpec(ndim=4)] + super(ReflectionPadding2D, self).__init__(**kwargs) + + def compute_output_shape(self, s): + """ If you are using "channels_last" configuration""" + return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3]) + + def call(self, x, mask=None): + w_pad,h_pad = self.padding + if "tensorflow" in backend: + return K.tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT') + elif backend == "plaidML": + return TileOP_ReflectionPadding2D.function(x, self.padding[0], self.padding[1]) + else: + if K.image_data_format() == 'channels_last': + if x.shape.ndims == 4: + w = K.concatenate ([ x[:,:,w_pad:0:-1,:], + x, + x[:,:,-2:-w_pad-2:-1,:] ], axis=2 ) + h = K.concatenate ([ w[:,h_pad:0:-1,:,:], + w, + w[:,-2:-h_pad-2:-1,:,:] ], axis=1 ) + return h + else: + raise NotImplemented + else: + raise NotImplemented + + nnlib.ReflectionPadding2D = ReflectionPadding2D + + class Conv2D(): + def __init__ (self, *args, **kwargs): + self.reflect_pad = False + padding = kwargs.get('padding','') + if padding == 'zero': + kwargs['padding'] = 'same' + if padding == 'reflect': + kernel_size = kwargs['kernel_size'] + if (kernel_size % 2) == 1: + self.pad = (kernel_size // 2,)*2 + kwargs['padding'] = 'valid' + self.reflect_pad = True + self.func = keras.layers.Conv2D (*args, **kwargs) + + def __call__(self,x): + if self.reflect_pad: + x = ReflectionPadding2D( self.pad ) (x) + return self.func(x) + nnlib.Conv2D = Conv2D + + class Conv2DTranspose(): + def __init__ (self, *args, **kwargs): + self.reflect_pad = False + padding = kwargs.get('padding','') + if padding == 'zero': + kwargs['padding'] = 'same' + if padding == 'reflect': + kernel_size = kwargs['kernel_size'] + if (kernel_size % 2) == 1: + self.pad = (kernel_size // 2,)*2 + kwargs['padding'] = 'valid' + self.reflect_pad = True + self.func = keras.layers.Conv2DTranspose (*args, **kwargs) + + def __call__(self,x): + if self.reflect_pad: + x = ReflectionPadding2D( self.pad ) (x) + return self.func(x) + nnlib.Conv2DTranspose = Conv2DTranspose + + @staticmethod + def import_keras_contrib(device_config): + if nnlib.keras_contrib is not None: + return nnlib.code_import_keras_contrib + + import keras_contrib as keras_contrib_ + nnlib.keras_contrib = keras_contrib_ + nnlib.__initialize_keras_contrib_functions() + nnlib.code_import_keras_contrib = compile (nnlib.code_import_keras_contrib_string,'','exec') + + @staticmethod + def __initialize_keras_contrib_functions(): + pass + + @staticmethod + def import_dlib( device_config = None): + if nnlib.dlib is not None: + return nnlib.code_import_dlib + + import dlib as dlib_ + nnlib.dlib = dlib_ + if not device_config.cpu_only and "tensorflow" in device_config.backend and len(device_config.gpu_idxs) > 0: + nnlib.dlib.cuda.set_device(device_config.gpu_idxs[0]) + + nnlib.code_import_dlib = compile (nnlib.code_import_dlib_string,'','exec') + + @staticmethod + def import_all(device_config = None): + if nnlib.code_import_all is None: + if device_config is None: + device_config = nnlib.active_DeviceConfig + else: + nnlib.active_DeviceConfig = device_config + + nnlib.import_keras(device_config) + nnlib.import_keras_contrib(device_config) + nnlib.code_import_all = compile (nnlib.code_import_keras_string + '\n' + + nnlib.code_import_keras_contrib_string + + nnlib.code_import_all_string,'','exec') + nnlib.__initialize_all_functions() + + return nnlib.code_import_all + + @staticmethod + def __initialize_all_functions(): + exec (nnlib.import_keras(nnlib.active_DeviceConfig), locals(), globals()) + exec (nnlib.import_keras_contrib(nnlib.active_DeviceConfig), locals(), globals()) + + class DSSIMMSEMaskLoss(object): + def __init__(self, mask, is_mse=False): + self.mask = mask + self.is_mse = is_mse + def __call__(self,y_true, y_pred): + total_loss = None + mask = self.mask + if self.is_mse: + blur_mask = gaussian_blur(max(1, K.int_shape(mask)[1] // 64))(mask) + return K.mean ( 50*K.square( y_true*blur_mask - y_pred*blur_mask ) ) + else: + return 10*dssim() (y_true*mask, y_pred*mask) + nnlib.DSSIMMSEMaskLoss = DSSIMMSEMaskLoss + + + ''' + def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) + else: + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) + + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + + def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) + + def func(input): + + + def ResnetBlock(dim): + def func(input): + x = input + + x = ReflectionPadding2D((1,1))(x) + x = Conv2D(dim, 3, 1, padding='valid')(x) + x = XNormalization(x) + x = ReLU()(x) + + if use_dropout: + x = Dropout(0.5)(x) + + x = ReflectionPadding2D((1,1))(x) + x = Conv2D(dim, 3, 1, padding='valid')(x) + x = XNormalization(x) + x = ReLU()(x) + return Add()([x,input]) + return func + + x = input + + x = ReflectionPadding2D((3,3))(x) + x = Conv2D(ngf, 7, 1, 'valid')(x) + + x = ReLU()(XNormalization(Conv2D(ngf*2, 4, 2, 'same')(x))) + x = ReLU()(XNormalization(Conv2D(ngf*4, 4, 2, 'same')(x))) + + for i in range(n_blocks): + x = ResnetBlock(ngf*4)(x) + + x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf*2 *4, 3, 1, 'same')(x)))) + x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf *4, 3, 1, 'same')(x)))) + + x = ReflectionPadding2D((3,3))(x) + x = Conv2D(output_nc, 7, 1, 'valid')(x) + x = tanh(x) + + return x + + return func + + nnlib.ResNet = ResNet + + # Defines the Unet generator. + # |num_downs|: number of downsamplings in UNet. For example, + # if |num_downs| == 7, image of size 128x128 will become of size 1x1 + # at the bottleneck + def UNet(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) + else: + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) + + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + + def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) + + def UNetSkipConnection(outer_nc, inner_nc, sub_model=None, outermost=False, innermost=False, use_dropout=False): + def func(inp): + x = inp + + x = Conv2D(inner_nc, 4, 2, 'valid')(ReflectionPadding2D( (1,1) )(x)) + x = XNormalization(x) + x = ReLU()(x) + + if not innermost: + x = sub_model(x) + + if not outermost: + x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) + x = XNormalization(x) + x = ReLU()(x) + + if not innermost: + if use_dropout: + x = Dropout(0.5)(x) + + x = Concatenate(axis=3)([inp, x]) + else: + x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) + x = tanh(x) + + + return x + + return func + + def func(input): + + unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=None, innermost=True) + + for i in range(num_downs - 5): + unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=unet_block, use_dropout=use_dropout) + + unet_block = UNetSkipConnection(ngf * 4 , ngf * 8, sub_model=unet_block) + unet_block = UNetSkipConnection(ngf * 2 , ngf * 4, sub_model=unet_block) + unet_block = UNetSkipConnection(ngf , ngf * 2, sub_model=unet_block) + unet_block = UNetSkipConnection(output_nc, ngf , sub_model=unet_block, outermost=True) + + return unet_block(input) + return func + nnlib.UNet = UNet + + #predicts based on two past_image_tensors + def UNetTemporalPredictor(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + def func(inputs): + past_2_image_tensor, past_1_image_tensor = inputs + + x = Concatenate(axis=3)([ past_2_image_tensor, past_1_image_tensor ]) + x = UNet(3, use_batch_norm, num_downs=num_downs, ngf=ngf, use_dropout=use_dropout) (x) + + return x + + return func + nnlib.UNetTemporalPredictor = UNetTemporalPredictor + + def NLayerDiscriminator(use_batch_norm, ndf=64, n_layers=3): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) + else: + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) + + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + + def func(input): + x = input + + x = ZeroPadding2D((1,1))(x) + x = Conv2D( ndf, 4, 2, 'valid')(x) + x = LeakyReLU(0.2)(x) + + for i in range(1, n_layers): + x = ZeroPadding2D((1,1))(x) + x = Conv2D( ndf * min(2 ** i, 8), 4, 2, 'valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = Conv2D( ndf * min(2 ** n_layers, 8), 4, 1, 'valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + return Conv2D( 1, 4, 1, 'valid')(x) + return func + nnlib.NLayerDiscriminator = NLayerDiscriminator + ''' + @staticmethod + def finalize_all(): + if nnlib.keras_contrib is not None: + nnlib.keras_contrib = None + + if nnlib.keras is not None: + nnlib.keras.backend.clear_session() + nnlib.keras = None + + if nnlib.tf is not None: + nnlib.tf_sess = None + nnlib.tf = None + + +class CAInitializerMPSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + + #override + def on_initialize(self, client_dict): + self.floatx = client_dict['floatx'] + self.data_format = client_dict['data_format'] + + #override + def process_data(self, data): + idx, shape = data + weights = CAGenerateWeights (shape, self.floatx, self.data_format) + return idx, weights + + #override + def get_data_name (self, data): + #return string identificator of your data + return "undefined" + + #override + def __init__(self, idx_shapes_list, floatx, data_format ): + + self.idx_shapes_list = idx_shapes_list + self.floatx = floatx + self.data_format = data_format + + self.result = [] + super().__init__('CAInitializerMP', CAInitializerMPSubprocessor.Cli) + + #override + def on_clients_initialized(self): + io.progress_bar ("Initializing CA weights", len (self.idx_shapes_list)) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def process_info_generator(self): + for i in range(multiprocessing.cpu_count()): + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + 'floatx' : self.floatx, + 'data_format' : self.data_format + } + + #override + def get_data(self, host_dict): + if len (self.idx_shapes_list) > 0: + return self.idx_shapes_list.pop(0) + + return None + + #override + def on_data_return (self, host_dict, data): + self.idx_shapes_list.insert(0, data) + + #override + def on_result (self, host_dict, data, result): + self.result.append ( result ) + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.result diff --git a/nnlib/pynvml.py b/nnlib/pynvml.py index 5cc5a50..5923f23 100644 --- a/nnlib/pynvml.py +++ b/nnlib/pynvml.py @@ -1,1727 +1,1727 @@ -##### -# Copyright (c) 2011-2015, NVIDIA Corporation. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the NVIDIA Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -##### - -## -# Python bindings for the NVML library -## -from ctypes import * -from ctypes.util import find_library -import sys -import os -import threading -import string - -## C Type mappings ## -## Enums -_nvmlEnableState_t = c_uint -NVML_FEATURE_DISABLED = 0 -NVML_FEATURE_ENABLED = 1 - -_nvmlBrandType_t = c_uint -NVML_BRAND_UNKNOWN = 0 -NVML_BRAND_QUADRO = 1 -NVML_BRAND_TESLA = 2 -NVML_BRAND_NVS = 3 -NVML_BRAND_GRID = 4 -NVML_BRAND_GEFORCE = 5 -NVML_BRAND_COUNT = 6 - -_nvmlTemperatureThresholds_t = c_uint -NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0 -NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1 -NVML_TEMPERATURE_THRESHOLD_COUNT = 1 - -_nvmlTemperatureSensors_t = c_uint -NVML_TEMPERATURE_GPU = 0 -NVML_TEMPERATURE_COUNT = 1 - -_nvmlComputeMode_t = c_uint -NVML_COMPUTEMODE_DEFAULT = 0 -NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1 -NVML_COMPUTEMODE_PROHIBITED = 2 -NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 -NVML_COMPUTEMODE_COUNT = 4 - -_nvmlMemoryLocation_t = c_uint -NVML_MEMORY_LOCATION_L1_CACHE = 0 -NVML_MEMORY_LOCATION_L2_CACHE = 1 -NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2 -NVML_MEMORY_LOCATION_REGISTER_FILE = 3 -NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4 -NVML_MEMORY_LOCATION_COUNT = 5 - -# These are deprecated, instead use _nvmlMemoryErrorType_t -_nvmlEccBitType_t = c_uint -NVML_SINGLE_BIT_ECC = 0 -NVML_DOUBLE_BIT_ECC = 1 -NVML_ECC_ERROR_TYPE_COUNT = 2 - -_nvmlEccCounterType_t = c_uint -NVML_VOLATILE_ECC = 0 -NVML_AGGREGATE_ECC = 1 -NVML_ECC_COUNTER_TYPE_COUNT = 2 - -_nvmlMemoryErrorType_t = c_uint -NVML_MEMORY_ERROR_TYPE_CORRECTED = 0 -NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1 -NVML_MEMORY_ERROR_TYPE_COUNT = 2 - -_nvmlClockType_t = c_uint -NVML_CLOCK_GRAPHICS = 0 -NVML_CLOCK_SM = 1 -NVML_CLOCK_MEM = 2 -NVML_CLOCK_COUNT = 3 - -_nvmlDriverModel_t = c_uint -NVML_DRIVER_WDDM = 0 -NVML_DRIVER_WDM = 1 - -_nvmlPstates_t = c_uint -NVML_PSTATE_0 = 0 -NVML_PSTATE_1 = 1 -NVML_PSTATE_2 = 2 -NVML_PSTATE_3 = 3 -NVML_PSTATE_4 = 4 -NVML_PSTATE_5 = 5 -NVML_PSTATE_6 = 6 -NVML_PSTATE_7 = 7 -NVML_PSTATE_8 = 8 -NVML_PSTATE_9 = 9 -NVML_PSTATE_10 = 10 -NVML_PSTATE_11 = 11 -NVML_PSTATE_12 = 12 -NVML_PSTATE_13 = 13 -NVML_PSTATE_14 = 14 -NVML_PSTATE_15 = 15 -NVML_PSTATE_UNKNOWN = 32 - -_nvmlInforomObject_t = c_uint -NVML_INFOROM_OEM = 0 -NVML_INFOROM_ECC = 1 -NVML_INFOROM_POWER = 2 -NVML_INFOROM_COUNT = 3 - -_nvmlReturn_t = c_uint -NVML_SUCCESS = 0 -NVML_ERROR_UNINITIALIZED = 1 -NVML_ERROR_INVALID_ARGUMENT = 2 -NVML_ERROR_NOT_SUPPORTED = 3 -NVML_ERROR_NO_PERMISSION = 4 -NVML_ERROR_ALREADY_INITIALIZED = 5 -NVML_ERROR_NOT_FOUND = 6 -NVML_ERROR_INSUFFICIENT_SIZE = 7 -NVML_ERROR_INSUFFICIENT_POWER = 8 -NVML_ERROR_DRIVER_NOT_LOADED = 9 -NVML_ERROR_TIMEOUT = 10 -NVML_ERROR_IRQ_ISSUE = 11 -NVML_ERROR_LIBRARY_NOT_FOUND = 12 -NVML_ERROR_FUNCTION_NOT_FOUND = 13 -NVML_ERROR_CORRUPTED_INFOROM = 14 -NVML_ERROR_GPU_IS_LOST = 15 -NVML_ERROR_RESET_REQUIRED = 16 -NVML_ERROR_OPERATING_SYSTEM = 17 -NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18 -NVML_ERROR_UNKNOWN = 999 - -_nvmlFanState_t = c_uint -NVML_FAN_NORMAL = 0 -NVML_FAN_FAILED = 1 - -_nvmlLedColor_t = c_uint -NVML_LED_COLOR_GREEN = 0 -NVML_LED_COLOR_AMBER = 1 - -_nvmlGpuOperationMode_t = c_uint -NVML_GOM_ALL_ON = 0 -NVML_GOM_COMPUTE = 1 -NVML_GOM_LOW_DP = 2 - -_nvmlPageRetirementCause_t = c_uint -NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR = 0 -NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS = 1 -NVML_PAGE_RETIREMENT_CAUSE_COUNT = 2 - -_nvmlRestrictedAPI_t = c_uint -NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0 -NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1 -NVML_RESTRICTED_API_COUNT = 2 - -_nvmlBridgeChipType_t = c_uint -NVML_BRIDGE_CHIP_PLX = 0 -NVML_BRIDGE_CHIP_BRO4 = 1 -NVML_MAX_PHYSICAL_BRIDGE = 128 - -_nvmlValueType_t = c_uint -NVML_VALUE_TYPE_DOUBLE = 0 -NVML_VALUE_TYPE_UNSIGNED_INT = 1 -NVML_VALUE_TYPE_UNSIGNED_LONG = 2 -NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3 -NVML_VALUE_TYPE_COUNT = 4 - -_nvmlPerfPolicyType_t = c_uint -NVML_PERF_POLICY_POWER = 0 -NVML_PERF_POLICY_THERMAL = 1 -NVML_PERF_POLICY_COUNT = 2 - -_nvmlSamplingType_t = c_uint -NVML_TOTAL_POWER_SAMPLES = 0 -NVML_GPU_UTILIZATION_SAMPLES = 1 -NVML_MEMORY_UTILIZATION_SAMPLES = 2 -NVML_ENC_UTILIZATION_SAMPLES = 3 -NVML_DEC_UTILIZATION_SAMPLES = 4 -NVML_PROCESSOR_CLK_SAMPLES = 5 -NVML_MEMORY_CLK_SAMPLES = 6 -NVML_SAMPLINGTYPE_COUNT = 7 - -_nvmlPcieUtilCounter_t = c_uint -NVML_PCIE_UTIL_TX_BYTES = 0 -NVML_PCIE_UTIL_RX_BYTES = 1 -NVML_PCIE_UTIL_COUNT = 2 - -_nvmlGpuTopologyLevel_t = c_uint -NVML_TOPOLOGY_INTERNAL = 0 -NVML_TOPOLOGY_SINGLE = 10 -NVML_TOPOLOGY_MULTIPLE = 20 -NVML_TOPOLOGY_HOSTBRIDGE = 30 -NVML_TOPOLOGY_CPU = 40 -NVML_TOPOLOGY_SYSTEM = 50 - -# C preprocessor defined values -nvmlFlagDefault = 0 -nvmlFlagForce = 1 - -# buffer size -NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE = 16 -NVML_DEVICE_UUID_BUFFER_SIZE = 80 -NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE = 81 -NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE = 80 -NVML_DEVICE_NAME_BUFFER_SIZE = 64 -NVML_DEVICE_SERIAL_BUFFER_SIZE = 30 -NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE = 32 -NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE = 16 - -NVML_VALUE_NOT_AVAILABLE_ulonglong = c_ulonglong(-1) -NVML_VALUE_NOT_AVAILABLE_uint = c_uint(-1) - -## Lib loading ## -nvmlLib = None -libLoadLock = threading.Lock() -_nvmlLib_refcount = 0 # Incremented on each nvmlInit and decremented on nvmlShutdown - -## Error Checking ## -class NVMLError(Exception): - _valClassMapping = dict() - # List of currently known error codes - _errcode_to_string = { - NVML_ERROR_UNINITIALIZED: "Uninitialized", - NVML_ERROR_INVALID_ARGUMENT: "Invalid Argument", - NVML_ERROR_NOT_SUPPORTED: "Not Supported", - NVML_ERROR_NO_PERMISSION: "Insufficient Permissions", - NVML_ERROR_ALREADY_INITIALIZED: "Already Initialized", - NVML_ERROR_NOT_FOUND: "Not Found", - NVML_ERROR_INSUFFICIENT_SIZE: "Insufficient Size", - NVML_ERROR_INSUFFICIENT_POWER: "Insufficient External Power", - NVML_ERROR_DRIVER_NOT_LOADED: "Driver Not Loaded", - NVML_ERROR_TIMEOUT: "Timeout", - NVML_ERROR_IRQ_ISSUE: "Interrupt Request Issue", - NVML_ERROR_LIBRARY_NOT_FOUND: "NVML Shared Library Not Found", - NVML_ERROR_FUNCTION_NOT_FOUND: "Function Not Found", - NVML_ERROR_CORRUPTED_INFOROM: "Corrupted infoROM", - NVML_ERROR_GPU_IS_LOST: "GPU is lost", - NVML_ERROR_RESET_REQUIRED: "GPU requires restart", - NVML_ERROR_OPERATING_SYSTEM: "The operating system has blocked the request.", - NVML_ERROR_LIB_RM_VERSION_MISMATCH: "RM has detected an NVML/RM version mismatch.", - NVML_ERROR_UNKNOWN: "Unknown Error", - } - def __new__(typ, value): - ''' - Maps value to a proper subclass of NVMLError. - See _extractNVMLErrorsAsClasses function for more details - ''' - if typ == NVMLError: - typ = NVMLError._valClassMapping.get(value, typ) - obj = Exception.__new__(typ) - obj.value = value - return obj - def __str__(self): - try: - if self.value not in NVMLError._errcode_to_string: - NVMLError._errcode_to_string[self.value] = str(nvmlErrorString(self.value)) - return NVMLError._errcode_to_string[self.value] - except NVMLError_Uninitialized: - return "NVML Error with code %d" % self.value - def __eq__(self, other): - return self.value == other.value - -def _extractNVMLErrorsAsClasses(): - ''' - Generates a hierarchy of classes on top of NVMLError class. - - Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate - exceptions more easily. - - NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass. - e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized - ''' - this_module = sys.modules[__name__] - nvmlErrorsNames = filter(lambda x: x.startswith("NVML_ERROR_"), dir(this_module)) - for err_name in nvmlErrorsNames: - # e.g. Turn NVML_ERROR_ALREADY_INITIALIZED into NVMLError_AlreadyInitialized - class_name = "NVMLError_" + string.capwords(err_name.replace("NVML_ERROR_", ""), "_").replace("_", "") - err_val = getattr(this_module, err_name) - def gen_new(val): - def new(typ): - obj = NVMLError.__new__(typ, val) - return obj - return new - new_error_class = type(class_name, (NVMLError,), {'__new__': gen_new(err_val)}) - new_error_class.__module__ = __name__ - setattr(this_module, class_name, new_error_class) - NVMLError._valClassMapping[err_val] = new_error_class -_extractNVMLErrorsAsClasses() - -def _nvmlCheckReturn(ret): - if (ret != NVML_SUCCESS): - raise NVMLError(ret) - return ret - -## Function access ## -_nvmlGetFunctionPointer_cache = dict() # function pointers are cached to prevent unnecessary libLoadLock locking -def _nvmlGetFunctionPointer(name): - global nvmlLib - - if name in _nvmlGetFunctionPointer_cache: - return _nvmlGetFunctionPointer_cache[name] - - libLoadLock.acquire() - try: - # ensure library was loaded - if (nvmlLib == None): - raise NVMLError(NVML_ERROR_UNINITIALIZED) - try: - _nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name) - return _nvmlGetFunctionPointer_cache[name] - except AttributeError: - raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND) - finally: - # lock is always freed - libLoadLock.release() - -## Alternative object -# Allows the object to be printed -# Allows mismatched types to be assigned -# - like None when the Structure variant requires c_uint -class nvmlFriendlyObject(object): - def __init__(self, dictionary): - for x in dictionary: - setattr(self, x, dictionary[x]) - def __str__(self): - return self.__dict__.__str__() - -def nvmlStructToFriendlyObject(struct): - d = {} - for x in struct._fields_: - key = x[0] - value = getattr(struct, key) - d[key] = value - obj = nvmlFriendlyObject(d) - return obj - -# pack the object so it can be passed to the NVML library -def nvmlFriendlyObjectToStruct(obj, model): - for x in model._fields_: - key = x[0] - value = obj.__dict__[key] - setattr(model, key, value) - return model - -## Unit structures -class struct_c_nvmlUnit_t(Structure): - pass # opaque handle -c_nvmlUnit_t = POINTER(struct_c_nvmlUnit_t) - -class _PrintableStructure(Structure): - """ - Abstract class that produces nicer __str__ output than ctypes.Structure. - e.g. instead of: - >>> print str(obj) - - this class will print - class_name(field_name: formatted_value, field_name: formatted_value) - - _fmt_ dictionary of -> - e.g. class that has _field_ 'hex_value', c_uint could be formatted with - _fmt_ = {"hex_value" : "%08X"} - to produce nicer output. - Default fomratting string for all fields can be set with key "" like: - _fmt_ = {"" : "%d MHz"} # e.g all values are numbers in MHz. - If not set it's assumed to be just "%s" - - Exact format of returned str from this class is subject to change in the future. - """ - _fmt_ = {} - def __str__(self): - result = [] - for x in self._fields_: - key = x[0] - value = getattr(self, key) - fmt = "%s" - if key in self._fmt_: - fmt = self._fmt_[key] - elif "" in self._fmt_: - fmt = self._fmt_[""] - result.append(("%s: " + fmt) % (key, value)) - return self.__class__.__name__ + "(" + string.join(result, ", ") + ")" - -class c_nvmlUnitInfo_t(_PrintableStructure): - _fields_ = [ - ('name', c_char * 96), - ('id', c_char * 96), - ('serial', c_char * 96), - ('firmwareVersion', c_char * 96), - ] - -class c_nvmlLedState_t(_PrintableStructure): - _fields_ = [ - ('cause', c_char * 256), - ('color', _nvmlLedColor_t), - ] - -class c_nvmlPSUInfo_t(_PrintableStructure): - _fields_ = [ - ('state', c_char * 256), - ('current', c_uint), - ('voltage', c_uint), - ('power', c_uint), - ] - -class c_nvmlUnitFanInfo_t(_PrintableStructure): - _fields_ = [ - ('speed', c_uint), - ('state', _nvmlFanState_t), - ] - -class c_nvmlUnitFanSpeeds_t(_PrintableStructure): - _fields_ = [ - ('fans', c_nvmlUnitFanInfo_t * 24), - ('count', c_uint) - ] - -## Device structures -class struct_c_nvmlDevice_t(Structure): - pass # opaque handle -c_nvmlDevice_t = POINTER(struct_c_nvmlDevice_t) - -class nvmlPciInfo_t(_PrintableStructure): - _fields_ = [ - ('busId', c_char * 16), - ('domain', c_uint), - ('bus', c_uint), - ('device', c_uint), - ('pciDeviceId', c_uint), - - # Added in 2.285 - ('pciSubSystemId', c_uint), - ('reserved0', c_uint), - ('reserved1', c_uint), - ('reserved2', c_uint), - ('reserved3', c_uint), - ] - _fmt_ = { - 'domain' : "0x%04X", - 'bus' : "0x%02X", - 'device' : "0x%02X", - 'pciDeviceId' : "0x%08X", - 'pciSubSystemId' : "0x%08X", - } - -class c_nvmlMemory_t(_PrintableStructure): - _fields_ = [ - ('total', c_ulonglong), - ('free', c_ulonglong), - ('used', c_ulonglong), - ] - _fmt_ = {'': "%d B"} - -class c_nvmlBAR1Memory_t(_PrintableStructure): - _fields_ = [ - ('bar1Total', c_ulonglong), - ('bar1Free', c_ulonglong), - ('bar1Used', c_ulonglong), - ] - _fmt_ = {'': "%d B"} - -# On Windows with the WDDM driver, usedGpuMemory is reported as None -# Code that processes this structure should check for None, I.E. -# -# if (info.usedGpuMemory == None): -# # TODO handle the error -# pass -# else: -# print("Using %d MiB of memory" % (info.usedGpuMemory / 1024 / 1024)) -# -# See NVML documentation for more information -class c_nvmlProcessInfo_t(_PrintableStructure): - _fields_ = [ - ('pid', c_uint), - ('usedGpuMemory', c_ulonglong), - ] - _fmt_ = {'usedGpuMemory': "%d B"} - -class c_nvmlBridgeChipInfo_t(_PrintableStructure): - _fields_ = [ - ('type', _nvmlBridgeChipType_t), - ('fwVersion', c_uint), - ] - -class c_nvmlBridgeChipHierarchy_t(_PrintableStructure): - _fields_ = [ - ('bridgeCount', c_uint), - ('bridgeChipInfo', c_nvmlBridgeChipInfo_t * 128), - ] - -class c_nvmlEccErrorCounts_t(_PrintableStructure): - _fields_ = [ - ('l1Cache', c_ulonglong), - ('l2Cache', c_ulonglong), - ('deviceMemory', c_ulonglong), - ('registerFile', c_ulonglong), - ] - -class c_nvmlUtilization_t(_PrintableStructure): - _fields_ = [ - ('gpu', c_uint), - ('memory', c_uint), - ] - _fmt_ = {'': "%d %%"} - -# Added in 2.285 -class c_nvmlHwbcEntry_t(_PrintableStructure): - _fields_ = [ - ('hwbcId', c_uint), - ('firmwareVersion', c_char * 32), - ] - -class c_nvmlValue_t(Union): - _fields_ = [ - ('dVal', c_double), - ('uiVal', c_uint), - ('ulVal', c_ulong), - ('ullVal', c_ulonglong), - ] - -class c_nvmlSample_t(_PrintableStructure): - _fields_ = [ - ('timeStamp', c_ulonglong), - ('sampleValue', c_nvmlValue_t), - ] - -class c_nvmlViolationTime_t(_PrintableStructure): - _fields_ = [ - ('referenceTime', c_ulonglong), - ('violationTime', c_ulonglong), - ] - -## Event structures -class struct_c_nvmlEventSet_t(Structure): - pass # opaque handle -c_nvmlEventSet_t = POINTER(struct_c_nvmlEventSet_t) - -nvmlEventTypeSingleBitEccError = 0x0000000000000001 -nvmlEventTypeDoubleBitEccError = 0x0000000000000002 -nvmlEventTypePState = 0x0000000000000004 -nvmlEventTypeXidCriticalError = 0x0000000000000008 -nvmlEventTypeClock = 0x0000000000000010 -nvmlEventTypeNone = 0x0000000000000000 -nvmlEventTypeAll = ( - nvmlEventTypeNone | - nvmlEventTypeSingleBitEccError | - nvmlEventTypeDoubleBitEccError | - nvmlEventTypePState | - nvmlEventTypeClock | - nvmlEventTypeXidCriticalError - ) - -## Clock Throttle Reasons defines -nvmlClocksThrottleReasonGpuIdle = 0x0000000000000001 -nvmlClocksThrottleReasonApplicationsClocksSetting = 0x0000000000000002 -nvmlClocksThrottleReasonUserDefinedClocks = nvmlClocksThrottleReasonApplicationsClocksSetting # deprecated, use nvmlClocksThrottleReasonApplicationsClocksSetting -nvmlClocksThrottleReasonSwPowerCap = 0x0000000000000004 -nvmlClocksThrottleReasonHwSlowdown = 0x0000000000000008 -nvmlClocksThrottleReasonUnknown = 0x8000000000000000 -nvmlClocksThrottleReasonNone = 0x0000000000000000 -nvmlClocksThrottleReasonAll = ( - nvmlClocksThrottleReasonNone | - nvmlClocksThrottleReasonGpuIdle | - nvmlClocksThrottleReasonApplicationsClocksSetting | - nvmlClocksThrottleReasonSwPowerCap | - nvmlClocksThrottleReasonHwSlowdown | - nvmlClocksThrottleReasonUnknown - ) - -class c_nvmlEventData_t(_PrintableStructure): - _fields_ = [ - ('device', c_nvmlDevice_t), - ('eventType', c_ulonglong), - ('eventData', c_ulonglong) - ] - _fmt_ = {'eventType': "0x%08X"} - -class c_nvmlAccountingStats_t(_PrintableStructure): - _fields_ = [ - ('gpuUtilization', c_uint), - ('memoryUtilization', c_uint), - ('maxMemoryUsage', c_ulonglong), - ('time', c_ulonglong), - ('startTime', c_ulonglong), - ('isRunning', c_uint), - ('reserved', c_uint * 5) - ] - -## C function wrappers ## -def nvmlInit(): - _LoadNvmlLibrary() - - # - # Initialize the library - # - fn = _nvmlGetFunctionPointer("nvmlInit_v2") - ret = fn() - _nvmlCheckReturn(ret) - - # Atomically update refcount - global _nvmlLib_refcount - libLoadLock.acquire() - _nvmlLib_refcount += 1 - libLoadLock.release() - return None - -def _LoadNvmlLibrary(): - ''' - Load the library if it isn't loaded already - ''' - global nvmlLib - - if (nvmlLib == None): - # lock to ensure only one caller loads the library - libLoadLock.acquire() - - try: - # ensure the library still isn't loaded - if (nvmlLib == None): - try: - if (sys.platform[:3] == "win"): - searchPaths = [ - os.path.join(os.getenv("ProgramFiles", r"C:\Program Files"), r"NVIDIA Corporation\NVSMI\nvml.dll"), - os.path.join(os.getenv("WinDir", r"C:\Windows"), r"System32\nvml.dll"), - ] - nvmlPath = next((x for x in searchPaths if os.path.isfile(x)), None) - if (nvmlPath == None): - _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) - else: - # cdecl calling convention - nvmlLib = CDLL(nvmlPath) - else: - # assume linux - nvmlLib = CDLL("libnvidia-ml.so.1") - except OSError as ose: - _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) - if (nvmlLib == None): - _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) - finally: - # lock is always freed - libLoadLock.release() - -def nvmlShutdown(): - # - # Leave the library loaded, but shutdown the interface - # - fn = _nvmlGetFunctionPointer("nvmlShutdown") - ret = fn() - _nvmlCheckReturn(ret) - - # Atomically update refcount - global _nvmlLib_refcount - libLoadLock.acquire() - if (0 < _nvmlLib_refcount): - _nvmlLib_refcount -= 1 - libLoadLock.release() - return None - -# Added in 2.285 -def nvmlErrorString(result): - fn = _nvmlGetFunctionPointer("nvmlErrorString") - fn.restype = c_char_p # otherwise return is an int - ret = fn(result) - return ret - -# Added in 2.285 -def nvmlSystemGetNVMLVersion(): - c_version = create_string_buffer(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlSystemGetNVMLVersion") - ret = fn(c_version, c_uint(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlSystemGetProcessName(pid): - c_name = create_string_buffer(1024) - fn = _nvmlGetFunctionPointer("nvmlSystemGetProcessName") - ret = fn(c_uint(pid), c_name, c_uint(1024)) - _nvmlCheckReturn(ret) - return c_name.value - -def nvmlSystemGetDriverVersion(): - c_version = create_string_buffer(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlSystemGetDriverVersion") - ret = fn(c_version, c_uint(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlSystemGetHicVersion(): - c_count = c_uint(0) - hics = None - fn = _nvmlGetFunctionPointer("nvmlSystemGetHicVersion") - - # get the count - ret = fn(byref(c_count), None) - - # this should only fail with insufficient size - if ((ret != NVML_SUCCESS) and - (ret != NVML_ERROR_INSUFFICIENT_SIZE)): - raise NVMLError(ret) - - # if there are no hics - if (c_count.value == 0): - return [] - - hic_array = c_nvmlHwbcEntry_t * c_count.value - hics = hic_array() - ret = fn(byref(c_count), hics) - _nvmlCheckReturn(ret) - return hics - -## Unit get functions -def nvmlUnitGetCount(): - c_count = c_uint() - fn = _nvmlGetFunctionPointer("nvmlUnitGetCount") - ret = fn(byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlUnitGetHandleByIndex(index): - c_index = c_uint(index) - unit = c_nvmlUnit_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetHandleByIndex") - ret = fn(c_index, byref(unit)) - _nvmlCheckReturn(ret) - return unit - -def nvmlUnitGetUnitInfo(unit): - c_info = c_nvmlUnitInfo_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetUnitInfo") - ret = fn(unit, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlUnitGetLedState(unit): - c_state = c_nvmlLedState_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetLedState") - ret = fn(unit, byref(c_state)) - _nvmlCheckReturn(ret) - return c_state - -def nvmlUnitGetPsuInfo(unit): - c_info = c_nvmlPSUInfo_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetPsuInfo") - ret = fn(unit, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlUnitGetTemperature(unit, type): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlUnitGetTemperature") - ret = fn(unit, c_uint(type), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -def nvmlUnitGetFanSpeedInfo(unit): - c_speeds = c_nvmlUnitFanSpeeds_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetFanSpeedInfo") - ret = fn(unit, byref(c_speeds)) - _nvmlCheckReturn(ret) - return c_speeds - -# added to API -def nvmlUnitGetDeviceCount(unit): - c_count = c_uint(0) - # query the unit to determine device count - fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") - ret = fn(unit, byref(c_count), None) - if (ret == NVML_ERROR_INSUFFICIENT_SIZE): - ret = NVML_SUCCESS - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlUnitGetDevices(unit): - c_count = c_uint(nvmlUnitGetDeviceCount(unit)) - device_array = c_nvmlDevice_t * c_count.value - c_devices = device_array() - fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") - ret = fn(unit, byref(c_count), c_devices) - _nvmlCheckReturn(ret) - return c_devices - -## Device get functions -def nvmlDeviceGetCount(): - c_count = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCount_v2") - ret = fn(byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlDeviceGetHandleByIndex(index): - c_index = c_uint(index) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByIndex_v2") - ret = fn(c_index, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleBySerial(serial): - c_serial = c_char_p(serial) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleBySerial") - ret = fn(c_serial, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleByUUID(uuid): - c_uuid = c_char_p(uuid) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByUUID") - ret = fn(c_uuid, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleByPciBusId(pciBusId): - c_busId = c_char_p(pciBusId) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByPciBusId_v2") - ret = fn(c_busId, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetName(handle): - c_name = create_string_buffer(NVML_DEVICE_NAME_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetName") - ret = fn(handle, c_name, c_uint(NVML_DEVICE_NAME_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_name.value - -def nvmlDeviceGetBoardId(handle): - c_id = c_uint(); - fn = _nvmlGetFunctionPointer("nvmlDeviceGetBoardId") - ret = fn(handle, byref(c_id)) - _nvmlCheckReturn(ret) - return c_id.value - -def nvmlDeviceGetMultiGpuBoard(handle): - c_multiGpu = c_uint(); - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMultiGpuBoard") - ret = fn(handle, byref(c_multiGpu)) - _nvmlCheckReturn(ret) - return c_multiGpu.value - -def nvmlDeviceGetBrand(handle): - c_type = _nvmlBrandType_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetBrand") - ret = fn(handle, byref(c_type)) - _nvmlCheckReturn(ret) - return c_type.value - -def nvmlDeviceGetSerial(handle): - c_serial = create_string_buffer(NVML_DEVICE_SERIAL_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSerial") - ret = fn(handle, c_serial, c_uint(NVML_DEVICE_SERIAL_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_serial.value - -def nvmlDeviceGetCpuAffinity(handle, cpuSetSize): - affinity_array = c_ulonglong * cpuSetSize - c_affinity = affinity_array() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCpuAffinity") - ret = fn(handle, cpuSetSize, byref(c_affinity)) - _nvmlCheckReturn(ret) - return c_affinity - -def nvmlDeviceSetCpuAffinity(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetCpuAffinity") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceClearCpuAffinity(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceClearCpuAffinity") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceGetMinorNumber(handle): - c_minor_number = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMinorNumber") - ret = fn(handle, byref(c_minor_number)) - _nvmlCheckReturn(ret) - return c_minor_number.value - -def nvmlDeviceGetUUID(handle): - c_uuid = create_string_buffer(NVML_DEVICE_UUID_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetUUID") - ret = fn(handle, c_uuid, c_uint(NVML_DEVICE_UUID_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_uuid.value - -def nvmlDeviceGetInforomVersion(handle, infoRomObject): - c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomVersion") - ret = fn(handle, _nvmlInforomObject_t(infoRomObject), - c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 4.304 -def nvmlDeviceGetInforomImageVersion(handle): - c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomImageVersion") - ret = fn(handle, c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 4.304 -def nvmlDeviceGetInforomConfigurationChecksum(handle): - c_checksum = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomConfigurationChecksum") - ret = fn(handle, byref(c_checksum)) - _nvmlCheckReturn(ret) - return c_checksum.value - -# Added in 4.304 -def nvmlDeviceValidateInforom(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceValidateInforom") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceGetDisplayMode(handle): - c_mode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceGetDisplayActive(handle): - c_mode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayActive") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - - -def nvmlDeviceGetPersistenceMode(handle): - c_state = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPersistenceMode") - ret = fn(handle, byref(c_state)) - _nvmlCheckReturn(ret) - return c_state.value - -def nvmlDeviceGetPciInfo(handle): - c_info = nvmlPciInfo_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPciInfo_v2") - ret = fn(handle, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlDeviceGetClockInfo(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetClockInfo") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 2.285 -def nvmlDeviceGetMaxClockInfo(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxClockInfo") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 4.304 -def nvmlDeviceGetApplicationsClock(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetApplicationsClock") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 5.319 -def nvmlDeviceGetDefaultApplicationsClock(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDefaultApplicationsClock") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 4.304 -def nvmlDeviceGetSupportedMemoryClocks(handle): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedMemoryClocks") - ret = fn(handle, byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no clocks - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - clocks_array = c_uint * c_count.value - c_clocks = clocks_array() - - # make the call again - ret = fn(handle, byref(c_count), c_clocks) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - procs.append(c_clocks[i]) - - return procs - else: - # error case - raise NVMLError(ret) - -# Added in 4.304 -def nvmlDeviceGetSupportedGraphicsClocks(handle, memoryClockMHz): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedGraphicsClocks") - ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no clocks - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - clocks_array = c_uint * c_count.value - c_clocks = clocks_array() - - # make the call again - ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), c_clocks) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - procs.append(c_clocks[i]) - - return procs - else: - # error case - raise NVMLError(ret) - -def nvmlDeviceGetFanSpeed(handle): - c_speed = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetFanSpeed") - ret = fn(handle, byref(c_speed)) - _nvmlCheckReturn(ret) - return c_speed.value - -def nvmlDeviceGetTemperature(handle, sensor): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperature") - ret = fn(handle, _nvmlTemperatureSensors_t(sensor), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -def nvmlDeviceGetTemperatureThreshold(handle, threshold): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperatureThreshold") - ret = fn(handle, _nvmlTemperatureThresholds_t(threshold), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -# DEPRECATED use nvmlDeviceGetPerformanceState -def nvmlDeviceGetPowerState(handle): - c_pstate = _nvmlPstates_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerState") - ret = fn(handle, byref(c_pstate)) - _nvmlCheckReturn(ret) - return c_pstate.value - -def nvmlDeviceGetPerformanceState(handle): - c_pstate = _nvmlPstates_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPerformanceState") - ret = fn(handle, byref(c_pstate)) - _nvmlCheckReturn(ret) - return c_pstate.value - -def nvmlDeviceGetPowerManagementMode(handle): - c_pcapMode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementMode") - ret = fn(handle, byref(c_pcapMode)) - _nvmlCheckReturn(ret) - return c_pcapMode.value - -def nvmlDeviceGetPowerManagementLimit(handle): - c_limit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimit") - ret = fn(handle, byref(c_limit)) - _nvmlCheckReturn(ret) - return c_limit.value - -# Added in 4.304 -def nvmlDeviceGetPowerManagementLimitConstraints(handle): - c_minLimit = c_uint() - c_maxLimit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimitConstraints") - ret = fn(handle, byref(c_minLimit), byref(c_maxLimit)) - _nvmlCheckReturn(ret) - return [c_minLimit.value, c_maxLimit.value] - -# Added in 4.304 -def nvmlDeviceGetPowerManagementDefaultLimit(handle): - c_limit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementDefaultLimit") - ret = fn(handle, byref(c_limit)) - _nvmlCheckReturn(ret) - return c_limit.value - - -# Added in 331 -def nvmlDeviceGetEnforcedPowerLimit(handle): - c_limit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetEnforcedPowerLimit") - ret = fn(handle, byref(c_limit)) - _nvmlCheckReturn(ret) - return c_limit.value - -def nvmlDeviceGetPowerUsage(handle): - c_watts = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerUsage") - ret = fn(handle, byref(c_watts)) - _nvmlCheckReturn(ret) - return c_watts.value - -# Added in 4.304 -def nvmlDeviceGetGpuOperationMode(handle): - c_currState = _nvmlGpuOperationMode_t() - c_pendingState = _nvmlGpuOperationMode_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetGpuOperationMode") - ret = fn(handle, byref(c_currState), byref(c_pendingState)) - _nvmlCheckReturn(ret) - return [c_currState.value, c_pendingState.value] - -# Added in 4.304 -def nvmlDeviceGetCurrentGpuOperationMode(handle): - return nvmlDeviceGetGpuOperationMode(handle)[0] - -# Added in 4.304 -def nvmlDeviceGetPendingGpuOperationMode(handle): - return nvmlDeviceGetGpuOperationMode(handle)[1] - -def nvmlDeviceGetMemoryInfo(handle): - c_memory = c_nvmlMemory_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryInfo") - ret = fn(handle, byref(c_memory)) - _nvmlCheckReturn(ret) - return c_memory - -def nvmlDeviceGetBAR1MemoryInfo(handle): - c_bar1_memory = c_nvmlBAR1Memory_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetBAR1MemoryInfo") - ret = fn(handle, byref(c_bar1_memory)) - _nvmlCheckReturn(ret) - return c_bar1_memory - -def nvmlDeviceGetComputeMode(handle): - c_mode = _nvmlComputeMode_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceGetEccMode(handle): - c_currState = _nvmlEnableState_t() - c_pendingState = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetEccMode") - ret = fn(handle, byref(c_currState), byref(c_pendingState)) - _nvmlCheckReturn(ret) - return [c_currState.value, c_pendingState.value] - -# added to API -def nvmlDeviceGetCurrentEccMode(handle): - return nvmlDeviceGetEccMode(handle)[0] - -# added to API -def nvmlDeviceGetPendingEccMode(handle): - return nvmlDeviceGetEccMode(handle)[1] - -def nvmlDeviceGetTotalEccErrors(handle, errorType, counterType): - c_count = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTotalEccErrors") - ret = fn(handle, _nvmlMemoryErrorType_t(errorType), - _nvmlEccCounterType_t(counterType), byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -# This is deprecated, instead use nvmlDeviceGetMemoryErrorCounter -def nvmlDeviceGetDetailedEccErrors(handle, errorType, counterType): - c_counts = c_nvmlEccErrorCounts_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDetailedEccErrors") - ret = fn(handle, _nvmlMemoryErrorType_t(errorType), - _nvmlEccCounterType_t(counterType), byref(c_counts)) - _nvmlCheckReturn(ret) - return c_counts - -# Added in 4.304 -def nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, locationType): - c_count = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryErrorCounter") - ret = fn(handle, - _nvmlMemoryErrorType_t(errorType), - _nvmlEccCounterType_t(counterType), - _nvmlMemoryLocation_t(locationType), - byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlDeviceGetUtilizationRates(handle): - c_util = c_nvmlUtilization_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetUtilizationRates") - ret = fn(handle, byref(c_util)) - _nvmlCheckReturn(ret) - return c_util - -def nvmlDeviceGetEncoderUtilization(handle): - c_util = c_uint() - c_samplingPeriod = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetEncoderUtilization") - ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) - _nvmlCheckReturn(ret) - return [c_util.value, c_samplingPeriod.value] - -def nvmlDeviceGetDecoderUtilization(handle): - c_util = c_uint() - c_samplingPeriod = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDecoderUtilization") - ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) - _nvmlCheckReturn(ret) - return [c_util.value, c_samplingPeriod.value] - -def nvmlDeviceGetPcieReplayCounter(handle): - c_replay = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieReplayCounter") - ret = fn(handle, byref(c_replay)) - _nvmlCheckReturn(ret) - return c_replay.value - -def nvmlDeviceGetDriverModel(handle): - c_currModel = _nvmlDriverModel_t() - c_pendingModel = _nvmlDriverModel_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDriverModel") - ret = fn(handle, byref(c_currModel), byref(c_pendingModel)) - _nvmlCheckReturn(ret) - return [c_currModel.value, c_pendingModel.value] - -# added to API -def nvmlDeviceGetCurrentDriverModel(handle): - return nvmlDeviceGetDriverModel(handle)[0] - -# added to API -def nvmlDeviceGetPendingDriverModel(handle): - return nvmlDeviceGetDriverModel(handle)[1] - -# Added in 2.285 -def nvmlDeviceGetVbiosVersion(handle): - c_version = create_string_buffer(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetVbiosVersion") - ret = fn(handle, c_version, c_uint(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlDeviceGetComputeRunningProcesses(handle): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses") - ret = fn(handle, byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no running processes - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - # oversize the array incase more processes are created - c_count.value = c_count.value * 2 + 5 - proc_array = c_nvmlProcessInfo_t * c_count.value - c_procs = proc_array() - - # make the call again - ret = fn(handle, byref(c_count), c_procs) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - # use an alternative struct for this object - obj = nvmlStructToFriendlyObject(c_procs[i]) - if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): - # special case for WDDM on Windows, see comment above - obj.usedGpuMemory = None - procs.append(obj) - - return procs - else: - # error case - raise NVMLError(ret) - -def nvmlDeviceGetGraphicsRunningProcesses(handle): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetGraphicsRunningProcesses") - ret = fn(handle, byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no running processes - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - # oversize the array incase more processes are created - c_count.value = c_count.value * 2 + 5 - proc_array = c_nvmlProcessInfo_t * c_count.value - c_procs = proc_array() - - # make the call again - ret = fn(handle, byref(c_count), c_procs) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - # use an alternative struct for this object - obj = nvmlStructToFriendlyObject(c_procs[i]) - if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): - # special case for WDDM on Windows, see comment above - obj.usedGpuMemory = None - procs.append(obj) - - return procs - else: - # error case - raise NVMLError(ret) - -def nvmlDeviceGetAutoBoostedClocksEnabled(handle): - c_isEnabled = _nvmlEnableState_t() - c_defaultIsEnabled = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAutoBoostedClocksEnabled") - ret = fn(handle, byref(c_isEnabled), byref(c_defaultIsEnabled)) - _nvmlCheckReturn(ret) - return [c_isEnabled.value, c_defaultIsEnabled.value] - #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks - -## Set functions -def nvmlUnitSetLedState(unit, color): - fn = _nvmlGetFunctionPointer("nvmlUnitSetLedState") - ret = fn(unit, _nvmlLedColor_t(color)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetPersistenceMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetPersistenceMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetComputeMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetComputeMode") - ret = fn(handle, _nvmlComputeMode_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetEccMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetEccMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceClearEccErrorCounts(handle, counterType): - fn = _nvmlGetFunctionPointer("nvmlDeviceClearEccErrorCounts") - ret = fn(handle, _nvmlEccCounterType_t(counterType)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetDriverModel(handle, model): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetDriverModel") - ret = fn(handle, _nvmlDriverModel_t(model)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetAutoBoostedClocksEnabled(handle, enabled): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetAutoBoostedClocksEnabled") - ret = fn(handle, _nvmlEnableState_t(enabled)) - _nvmlCheckReturn(ret) - return None - #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks - -def nvmlDeviceSetDefaultAutoBoostedClocksEnabled(handle, enabled, flags): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetDefaultAutoBoostedClocksEnabled") - ret = fn(handle, _nvmlEnableState_t(enabled), c_uint(flags)) - _nvmlCheckReturn(ret) - return None - #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks - -# Added in 4.304 -def nvmlDeviceSetApplicationsClocks(handle, maxMemClockMHz, maxGraphicsClockMHz): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetApplicationsClocks") - ret = fn(handle, c_uint(maxMemClockMHz), c_uint(maxGraphicsClockMHz)) - _nvmlCheckReturn(ret) - return None - -# Added in 4.304 -def nvmlDeviceResetApplicationsClocks(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceResetApplicationsClocks") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -# Added in 4.304 -def nvmlDeviceSetPowerManagementLimit(handle, limit): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetPowerManagementLimit") - ret = fn(handle, c_uint(limit)) - _nvmlCheckReturn(ret) - return None - -# Added in 4.304 -def nvmlDeviceSetGpuOperationMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetGpuOperationMode") - ret = fn(handle, _nvmlGpuOperationMode_t(mode)) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlEventSetCreate(): - fn = _nvmlGetFunctionPointer("nvmlEventSetCreate") - eventSet = c_nvmlEventSet_t() - ret = fn(byref(eventSet)) - _nvmlCheckReturn(ret) - return eventSet - -# Added in 2.285 -def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet): - fn = _nvmlGetFunctionPointer("nvmlDeviceRegisterEvents") - ret = fn(handle, c_ulonglong(eventTypes), eventSet) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlDeviceGetSupportedEventTypes(handle): - c_eventTypes = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedEventTypes") - ret = fn(handle, byref(c_eventTypes)) - _nvmlCheckReturn(ret) - return c_eventTypes.value - -# Added in 2.285 -# raises NVML_ERROR_TIMEOUT exception on timeout -def nvmlEventSetWait(eventSet, timeoutms): - fn = _nvmlGetFunctionPointer("nvmlEventSetWait") - data = c_nvmlEventData_t() - ret = fn(eventSet, byref(data), c_uint(timeoutms)) - _nvmlCheckReturn(ret) - return data - -# Added in 2.285 -def nvmlEventSetFree(eventSet): - fn = _nvmlGetFunctionPointer("nvmlEventSetFree") - ret = fn(eventSet) - _nvmlCheckReturn(ret) - return None - -# Added in 3.295 -def nvmlDeviceOnSameBoard(handle1, handle2): - fn = _nvmlGetFunctionPointer("nvmlDeviceOnSameBoard") - onSameBoard = c_int() - ret = fn(handle1, handle2, byref(onSameBoard)) - _nvmlCheckReturn(ret) - return (onSameBoard.value != 0) - -# Added in 3.295 -def nvmlDeviceGetCurrPcieLinkGeneration(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkGeneration") - gen = c_uint() - ret = fn(handle, byref(gen)) - _nvmlCheckReturn(ret) - return gen.value - -# Added in 3.295 -def nvmlDeviceGetMaxPcieLinkGeneration(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkGeneration") - gen = c_uint() - ret = fn(handle, byref(gen)) - _nvmlCheckReturn(ret) - return gen.value - -# Added in 3.295 -def nvmlDeviceGetCurrPcieLinkWidth(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkWidth") - width = c_uint() - ret = fn(handle, byref(width)) - _nvmlCheckReturn(ret) - return width.value - -# Added in 3.295 -def nvmlDeviceGetMaxPcieLinkWidth(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkWidth") - width = c_uint() - ret = fn(handle, byref(width)) - _nvmlCheckReturn(ret) - return width.value - -# Added in 4.304 -def nvmlDeviceGetSupportedClocksThrottleReasons(handle): - c_reasons= c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedClocksThrottleReasons") - ret = fn(handle, byref(c_reasons)) - _nvmlCheckReturn(ret) - return c_reasons.value - -# Added in 4.304 -def nvmlDeviceGetCurrentClocksThrottleReasons(handle): - c_reasons= c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrentClocksThrottleReasons") - ret = fn(handle, byref(c_reasons)) - _nvmlCheckReturn(ret) - return c_reasons.value - -# Added in 5.319 -def nvmlDeviceGetIndex(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetIndex") - c_index = c_uint() - ret = fn(handle, byref(c_index)) - _nvmlCheckReturn(ret) - return c_index.value - -# Added in 5.319 -def nvmlDeviceGetAccountingMode(handle): - c_mode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceSetAccountingMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetAccountingMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceClearAccountingPids(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceClearAccountingPids") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceGetAccountingStats(handle, pid): - stats = c_nvmlAccountingStats_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingStats") - ret = fn(handle, c_uint(pid), byref(stats)) - _nvmlCheckReturn(ret) - if (stats.maxMemoryUsage == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): - # special case for WDDM on Windows, see comment above - stats.maxMemoryUsage = None - return stats - -def nvmlDeviceGetAccountingPids(handle): - count = c_uint(nvmlDeviceGetAccountingBufferSize(handle)) - pids = (c_uint * count.value)() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingPids") - ret = fn(handle, byref(count), pids) - _nvmlCheckReturn(ret) - return map(int, pids[0:count.value]) - -def nvmlDeviceGetAccountingBufferSize(handle): - bufferSize = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingBufferSize") - ret = fn(handle, byref(bufferSize)) - _nvmlCheckReturn(ret) - return int(bufferSize.value) - -def nvmlDeviceGetRetiredPages(device, sourceFilter): - c_source = _nvmlPageRetirementCause_t(sourceFilter) - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPages") - - # First call will get the size - ret = fn(device, c_source, byref(c_count), None) - - # this should only fail with insufficient size - if ((ret != NVML_SUCCESS) and - (ret != NVML_ERROR_INSUFFICIENT_SIZE)): - raise NVMLError(ret) - - # call again with a buffer - # oversize the array for the rare cases where additional pages - # are retired between NVML calls - c_count.value = c_count.value * 2 + 5 - page_array = c_ulonglong * c_count.value - c_pages = page_array() - ret = fn(device, c_source, byref(c_count), c_pages) - _nvmlCheckReturn(ret) - return map(int, c_pages[0:c_count.value]) - -def nvmlDeviceGetRetiredPagesPendingStatus(device): - c_pending = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPagesPendingStatus") - ret = fn(device, byref(c_pending)) - _nvmlCheckReturn(ret) - return int(c_pending.value) - -def nvmlDeviceGetAPIRestriction(device, apiType): - c_permission = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAPIRestriction") - ret = fn(device, _nvmlRestrictedAPI_t(apiType), byref(c_permission)) - _nvmlCheckReturn(ret) - return int(c_permission.value) - -def nvmlDeviceSetAPIRestriction(handle, apiType, isRestricted): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetAPIRestriction") - ret = fn(handle, _nvmlRestrictedAPI_t(apiType), _nvmlEnableState_t(isRestricted)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceGetBridgeChipInfo(handle): - bridgeHierarchy = c_nvmlBridgeChipHierarchy_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetBridgeChipInfo") - ret = fn(handle, byref(bridgeHierarchy)) - _nvmlCheckReturn(ret) - return bridgeHierarchy - -def nvmlDeviceGetSamples(device, sampling_type, timeStamp): - c_sampling_type = _nvmlSamplingType_t(sampling_type) - c_time_stamp = c_ulonglong(timeStamp) - c_sample_count = c_uint(0) - c_sample_value_type = _nvmlValueType_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSamples") - - ## First Call gets the size - ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), None) - - # Stop if this fails - if (ret != NVML_SUCCESS): - raise NVMLError(ret) - - sampleArray = c_sample_count.value * c_nvmlSample_t - c_samples = sampleArray() - ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), c_samples) - _nvmlCheckReturn(ret) - return (c_sample_value_type.value, c_samples[0:c_sample_count.value]) - -def nvmlDeviceGetViolationStatus(device, perfPolicyType): - c_perfPolicy_type = _nvmlPerfPolicyType_t(perfPolicyType) - c_violTime = c_nvmlViolationTime_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetViolationStatus") - - ## Invoke the method to get violation time - ret = fn(device, c_perfPolicy_type, byref(c_violTime)) - _nvmlCheckReturn(ret) - return c_violTime - -def nvmlDeviceGetPcieThroughput(device, counter): - c_util = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieThroughput") - ret = fn(device, _nvmlPcieUtilCounter_t(counter), byref(c_util)) - _nvmlCheckReturn(ret) - return c_util.value - -def nvmlSystemGetTopologyGpuSet(cpuNumber): - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlSystemGetTopologyGpuSet") - - # First call will get the size - ret = fn(cpuNumber, byref(c_count), None) - - if ret != NVML_SUCCESS: - raise NVMLError(ret) - print(c_count.value) - # call again with a buffer - device_array = c_nvmlDevice_t * c_count.value - c_devices = device_array() - ret = fn(cpuNumber, byref(c_count), c_devices) - _nvmlCheckReturn(ret) - return map(None, c_devices[0:c_count.value]) - -def nvmlDeviceGetTopologyNearestGpus(device, level): - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyNearestGpus") - - # First call will get the size - ret = fn(device, level, byref(c_count), None) - - if ret != NVML_SUCCESS: - raise NVMLError(ret) - - # call again with a buffer - device_array = c_nvmlDevice_t * c_count.value - c_devices = device_array() - ret = fn(device, level, byref(c_count), c_devices) - _nvmlCheckReturn(ret) - return map(None, c_devices[0:c_count.value]) - -def nvmlDeviceGetTopologyCommonAncestor(device1, device2): - c_level = _nvmlGpuTopologyLevel_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyCommonAncestor") - ret = fn(device1, device2, byref(c_level)) - _nvmlCheckReturn(ret) - return c_level.value - -#DeepFaceLab additions -def nvmlDeviceGetCudaComputeCapability(device): - c_major = c_int() - c_minor = c_int() - - try: - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCudaComputeCapability") - except: - return 9, 9 - - # get the count - ret = fn(device, byref(c_major), byref(c_minor)) - - # this should only fail with insufficient size - if (ret != NVML_SUCCESS): - raise NVMLError(ret) - +##### +# Copyright (c) 2011-2015, NVIDIA Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the NVIDIA Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +##### + +## +# Python bindings for the NVML library +## +from ctypes import * +from ctypes.util import find_library +import sys +import os +import threading +import string + +## C Type mappings ## +## Enums +_nvmlEnableState_t = c_uint +NVML_FEATURE_DISABLED = 0 +NVML_FEATURE_ENABLED = 1 + +_nvmlBrandType_t = c_uint +NVML_BRAND_UNKNOWN = 0 +NVML_BRAND_QUADRO = 1 +NVML_BRAND_TESLA = 2 +NVML_BRAND_NVS = 3 +NVML_BRAND_GRID = 4 +NVML_BRAND_GEFORCE = 5 +NVML_BRAND_COUNT = 6 + +_nvmlTemperatureThresholds_t = c_uint +NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0 +NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1 +NVML_TEMPERATURE_THRESHOLD_COUNT = 1 + +_nvmlTemperatureSensors_t = c_uint +NVML_TEMPERATURE_GPU = 0 +NVML_TEMPERATURE_COUNT = 1 + +_nvmlComputeMode_t = c_uint +NVML_COMPUTEMODE_DEFAULT = 0 +NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1 +NVML_COMPUTEMODE_PROHIBITED = 2 +NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 +NVML_COMPUTEMODE_COUNT = 4 + +_nvmlMemoryLocation_t = c_uint +NVML_MEMORY_LOCATION_L1_CACHE = 0 +NVML_MEMORY_LOCATION_L2_CACHE = 1 +NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2 +NVML_MEMORY_LOCATION_REGISTER_FILE = 3 +NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4 +NVML_MEMORY_LOCATION_COUNT = 5 + +# These are deprecated, instead use _nvmlMemoryErrorType_t +_nvmlEccBitType_t = c_uint +NVML_SINGLE_BIT_ECC = 0 +NVML_DOUBLE_BIT_ECC = 1 +NVML_ECC_ERROR_TYPE_COUNT = 2 + +_nvmlEccCounterType_t = c_uint +NVML_VOLATILE_ECC = 0 +NVML_AGGREGATE_ECC = 1 +NVML_ECC_COUNTER_TYPE_COUNT = 2 + +_nvmlMemoryErrorType_t = c_uint +NVML_MEMORY_ERROR_TYPE_CORRECTED = 0 +NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1 +NVML_MEMORY_ERROR_TYPE_COUNT = 2 + +_nvmlClockType_t = c_uint +NVML_CLOCK_GRAPHICS = 0 +NVML_CLOCK_SM = 1 +NVML_CLOCK_MEM = 2 +NVML_CLOCK_COUNT = 3 + +_nvmlDriverModel_t = c_uint +NVML_DRIVER_WDDM = 0 +NVML_DRIVER_WDM = 1 + +_nvmlPstates_t = c_uint +NVML_PSTATE_0 = 0 +NVML_PSTATE_1 = 1 +NVML_PSTATE_2 = 2 +NVML_PSTATE_3 = 3 +NVML_PSTATE_4 = 4 +NVML_PSTATE_5 = 5 +NVML_PSTATE_6 = 6 +NVML_PSTATE_7 = 7 +NVML_PSTATE_8 = 8 +NVML_PSTATE_9 = 9 +NVML_PSTATE_10 = 10 +NVML_PSTATE_11 = 11 +NVML_PSTATE_12 = 12 +NVML_PSTATE_13 = 13 +NVML_PSTATE_14 = 14 +NVML_PSTATE_15 = 15 +NVML_PSTATE_UNKNOWN = 32 + +_nvmlInforomObject_t = c_uint +NVML_INFOROM_OEM = 0 +NVML_INFOROM_ECC = 1 +NVML_INFOROM_POWER = 2 +NVML_INFOROM_COUNT = 3 + +_nvmlReturn_t = c_uint +NVML_SUCCESS = 0 +NVML_ERROR_UNINITIALIZED = 1 +NVML_ERROR_INVALID_ARGUMENT = 2 +NVML_ERROR_NOT_SUPPORTED = 3 +NVML_ERROR_NO_PERMISSION = 4 +NVML_ERROR_ALREADY_INITIALIZED = 5 +NVML_ERROR_NOT_FOUND = 6 +NVML_ERROR_INSUFFICIENT_SIZE = 7 +NVML_ERROR_INSUFFICIENT_POWER = 8 +NVML_ERROR_DRIVER_NOT_LOADED = 9 +NVML_ERROR_TIMEOUT = 10 +NVML_ERROR_IRQ_ISSUE = 11 +NVML_ERROR_LIBRARY_NOT_FOUND = 12 +NVML_ERROR_FUNCTION_NOT_FOUND = 13 +NVML_ERROR_CORRUPTED_INFOROM = 14 +NVML_ERROR_GPU_IS_LOST = 15 +NVML_ERROR_RESET_REQUIRED = 16 +NVML_ERROR_OPERATING_SYSTEM = 17 +NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18 +NVML_ERROR_UNKNOWN = 999 + +_nvmlFanState_t = c_uint +NVML_FAN_NORMAL = 0 +NVML_FAN_FAILED = 1 + +_nvmlLedColor_t = c_uint +NVML_LED_COLOR_GREEN = 0 +NVML_LED_COLOR_AMBER = 1 + +_nvmlGpuOperationMode_t = c_uint +NVML_GOM_ALL_ON = 0 +NVML_GOM_COMPUTE = 1 +NVML_GOM_LOW_DP = 2 + +_nvmlPageRetirementCause_t = c_uint +NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR = 0 +NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS = 1 +NVML_PAGE_RETIREMENT_CAUSE_COUNT = 2 + +_nvmlRestrictedAPI_t = c_uint +NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0 +NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1 +NVML_RESTRICTED_API_COUNT = 2 + +_nvmlBridgeChipType_t = c_uint +NVML_BRIDGE_CHIP_PLX = 0 +NVML_BRIDGE_CHIP_BRO4 = 1 +NVML_MAX_PHYSICAL_BRIDGE = 128 + +_nvmlValueType_t = c_uint +NVML_VALUE_TYPE_DOUBLE = 0 +NVML_VALUE_TYPE_UNSIGNED_INT = 1 +NVML_VALUE_TYPE_UNSIGNED_LONG = 2 +NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3 +NVML_VALUE_TYPE_COUNT = 4 + +_nvmlPerfPolicyType_t = c_uint +NVML_PERF_POLICY_POWER = 0 +NVML_PERF_POLICY_THERMAL = 1 +NVML_PERF_POLICY_COUNT = 2 + +_nvmlSamplingType_t = c_uint +NVML_TOTAL_POWER_SAMPLES = 0 +NVML_GPU_UTILIZATION_SAMPLES = 1 +NVML_MEMORY_UTILIZATION_SAMPLES = 2 +NVML_ENC_UTILIZATION_SAMPLES = 3 +NVML_DEC_UTILIZATION_SAMPLES = 4 +NVML_PROCESSOR_CLK_SAMPLES = 5 +NVML_MEMORY_CLK_SAMPLES = 6 +NVML_SAMPLINGTYPE_COUNT = 7 + +_nvmlPcieUtilCounter_t = c_uint +NVML_PCIE_UTIL_TX_BYTES = 0 +NVML_PCIE_UTIL_RX_BYTES = 1 +NVML_PCIE_UTIL_COUNT = 2 + +_nvmlGpuTopologyLevel_t = c_uint +NVML_TOPOLOGY_INTERNAL = 0 +NVML_TOPOLOGY_SINGLE = 10 +NVML_TOPOLOGY_MULTIPLE = 20 +NVML_TOPOLOGY_HOSTBRIDGE = 30 +NVML_TOPOLOGY_CPU = 40 +NVML_TOPOLOGY_SYSTEM = 50 + +# C preprocessor defined values +nvmlFlagDefault = 0 +nvmlFlagForce = 1 + +# buffer size +NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE = 16 +NVML_DEVICE_UUID_BUFFER_SIZE = 80 +NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE = 81 +NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE = 80 +NVML_DEVICE_NAME_BUFFER_SIZE = 64 +NVML_DEVICE_SERIAL_BUFFER_SIZE = 30 +NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE = 32 +NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE = 16 + +NVML_VALUE_NOT_AVAILABLE_ulonglong = c_ulonglong(-1) +NVML_VALUE_NOT_AVAILABLE_uint = c_uint(-1) + +## Lib loading ## +nvmlLib = None +libLoadLock = threading.Lock() +_nvmlLib_refcount = 0 # Incremented on each nvmlInit and decremented on nvmlShutdown + +## Error Checking ## +class NVMLError(Exception): + _valClassMapping = dict() + # List of currently known error codes + _errcode_to_string = { + NVML_ERROR_UNINITIALIZED: "Uninitialized", + NVML_ERROR_INVALID_ARGUMENT: "Invalid Argument", + NVML_ERROR_NOT_SUPPORTED: "Not Supported", + NVML_ERROR_NO_PERMISSION: "Insufficient Permissions", + NVML_ERROR_ALREADY_INITIALIZED: "Already Initialized", + NVML_ERROR_NOT_FOUND: "Not Found", + NVML_ERROR_INSUFFICIENT_SIZE: "Insufficient Size", + NVML_ERROR_INSUFFICIENT_POWER: "Insufficient External Power", + NVML_ERROR_DRIVER_NOT_LOADED: "Driver Not Loaded", + NVML_ERROR_TIMEOUT: "Timeout", + NVML_ERROR_IRQ_ISSUE: "Interrupt Request Issue", + NVML_ERROR_LIBRARY_NOT_FOUND: "NVML Shared Library Not Found", + NVML_ERROR_FUNCTION_NOT_FOUND: "Function Not Found", + NVML_ERROR_CORRUPTED_INFOROM: "Corrupted infoROM", + NVML_ERROR_GPU_IS_LOST: "GPU is lost", + NVML_ERROR_RESET_REQUIRED: "GPU requires restart", + NVML_ERROR_OPERATING_SYSTEM: "The operating system has blocked the request.", + NVML_ERROR_LIB_RM_VERSION_MISMATCH: "RM has detected an NVML/RM version mismatch.", + NVML_ERROR_UNKNOWN: "Unknown Error", + } + def __new__(typ, value): + ''' + Maps value to a proper subclass of NVMLError. + See _extractNVMLErrorsAsClasses function for more details + ''' + if typ == NVMLError: + typ = NVMLError._valClassMapping.get(value, typ) + obj = Exception.__new__(typ) + obj.value = value + return obj + def __str__(self): + try: + if self.value not in NVMLError._errcode_to_string: + NVMLError._errcode_to_string[self.value] = str(nvmlErrorString(self.value)) + return NVMLError._errcode_to_string[self.value] + except NVMLError_Uninitialized: + return "NVML Error with code %d" % self.value + def __eq__(self, other): + return self.value == other.value + +def _extractNVMLErrorsAsClasses(): + ''' + Generates a hierarchy of classes on top of NVMLError class. + + Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate + exceptions more easily. + + NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass. + e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized + ''' + this_module = sys.modules[__name__] + nvmlErrorsNames = filter(lambda x: x.startswith("NVML_ERROR_"), dir(this_module)) + for err_name in nvmlErrorsNames: + # e.g. Turn NVML_ERROR_ALREADY_INITIALIZED into NVMLError_AlreadyInitialized + class_name = "NVMLError_" + string.capwords(err_name.replace("NVML_ERROR_", ""), "_").replace("_", "") + err_val = getattr(this_module, err_name) + def gen_new(val): + def new(typ): + obj = NVMLError.__new__(typ, val) + return obj + return new + new_error_class = type(class_name, (NVMLError,), {'__new__': gen_new(err_val)}) + new_error_class.__module__ = __name__ + setattr(this_module, class_name, new_error_class) + NVMLError._valClassMapping[err_val] = new_error_class +_extractNVMLErrorsAsClasses() + +def _nvmlCheckReturn(ret): + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + return ret + +## Function access ## +_nvmlGetFunctionPointer_cache = dict() # function pointers are cached to prevent unnecessary libLoadLock locking +def _nvmlGetFunctionPointer(name): + global nvmlLib + + if name in _nvmlGetFunctionPointer_cache: + return _nvmlGetFunctionPointer_cache[name] + + libLoadLock.acquire() + try: + # ensure library was loaded + if (nvmlLib == None): + raise NVMLError(NVML_ERROR_UNINITIALIZED) + try: + _nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name) + return _nvmlGetFunctionPointer_cache[name] + except AttributeError: + raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND) + finally: + # lock is always freed + libLoadLock.release() + +## Alternative object +# Allows the object to be printed +# Allows mismatched types to be assigned +# - like None when the Structure variant requires c_uint +class nvmlFriendlyObject(object): + def __init__(self, dictionary): + for x in dictionary: + setattr(self, x, dictionary[x]) + def __str__(self): + return self.__dict__.__str__() + +def nvmlStructToFriendlyObject(struct): + d = {} + for x in struct._fields_: + key = x[0] + value = getattr(struct, key) + d[key] = value + obj = nvmlFriendlyObject(d) + return obj + +# pack the object so it can be passed to the NVML library +def nvmlFriendlyObjectToStruct(obj, model): + for x in model._fields_: + key = x[0] + value = obj.__dict__[key] + setattr(model, key, value) + return model + +## Unit structures +class struct_c_nvmlUnit_t(Structure): + pass # opaque handle +c_nvmlUnit_t = POINTER(struct_c_nvmlUnit_t) + +class _PrintableStructure(Structure): + """ + Abstract class that produces nicer __str__ output than ctypes.Structure. + e.g. instead of: + >>> print str(obj) + + this class will print + class_name(field_name: formatted_value, field_name: formatted_value) + + _fmt_ dictionary of -> + e.g. class that has _field_ 'hex_value', c_uint could be formatted with + _fmt_ = {"hex_value" : "%08X"} + to produce nicer output. + Default fomratting string for all fields can be set with key "" like: + _fmt_ = {"" : "%d MHz"} # e.g all values are numbers in MHz. + If not set it's assumed to be just "%s" + + Exact format of returned str from this class is subject to change in the future. + """ + _fmt_ = {} + def __str__(self): + result = [] + for x in self._fields_: + key = x[0] + value = getattr(self, key) + fmt = "%s" + if key in self._fmt_: + fmt = self._fmt_[key] + elif "" in self._fmt_: + fmt = self._fmt_[""] + result.append(("%s: " + fmt) % (key, value)) + return self.__class__.__name__ + "(" + string.join(result, ", ") + ")" + +class c_nvmlUnitInfo_t(_PrintableStructure): + _fields_ = [ + ('name', c_char * 96), + ('id', c_char * 96), + ('serial', c_char * 96), + ('firmwareVersion', c_char * 96), + ] + +class c_nvmlLedState_t(_PrintableStructure): + _fields_ = [ + ('cause', c_char * 256), + ('color', _nvmlLedColor_t), + ] + +class c_nvmlPSUInfo_t(_PrintableStructure): + _fields_ = [ + ('state', c_char * 256), + ('current', c_uint), + ('voltage', c_uint), + ('power', c_uint), + ] + +class c_nvmlUnitFanInfo_t(_PrintableStructure): + _fields_ = [ + ('speed', c_uint), + ('state', _nvmlFanState_t), + ] + +class c_nvmlUnitFanSpeeds_t(_PrintableStructure): + _fields_ = [ + ('fans', c_nvmlUnitFanInfo_t * 24), + ('count', c_uint) + ] + +## Device structures +class struct_c_nvmlDevice_t(Structure): + pass # opaque handle +c_nvmlDevice_t = POINTER(struct_c_nvmlDevice_t) + +class nvmlPciInfo_t(_PrintableStructure): + _fields_ = [ + ('busId', c_char * 16), + ('domain', c_uint), + ('bus', c_uint), + ('device', c_uint), + ('pciDeviceId', c_uint), + + # Added in 2.285 + ('pciSubSystemId', c_uint), + ('reserved0', c_uint), + ('reserved1', c_uint), + ('reserved2', c_uint), + ('reserved3', c_uint), + ] + _fmt_ = { + 'domain' : "0x%04X", + 'bus' : "0x%02X", + 'device' : "0x%02X", + 'pciDeviceId' : "0x%08X", + 'pciSubSystemId' : "0x%08X", + } + +class c_nvmlMemory_t(_PrintableStructure): + _fields_ = [ + ('total', c_ulonglong), + ('free', c_ulonglong), + ('used', c_ulonglong), + ] + _fmt_ = {'': "%d B"} + +class c_nvmlBAR1Memory_t(_PrintableStructure): + _fields_ = [ + ('bar1Total', c_ulonglong), + ('bar1Free', c_ulonglong), + ('bar1Used', c_ulonglong), + ] + _fmt_ = {'': "%d B"} + +# On Windows with the WDDM driver, usedGpuMemory is reported as None +# Code that processes this structure should check for None, I.E. +# +# if (info.usedGpuMemory == None): +# # TODO handle the error +# pass +# else: +# print("Using %d MiB of memory" % (info.usedGpuMemory / 1024 / 1024)) +# +# See NVML documentation for more information +class c_nvmlProcessInfo_t(_PrintableStructure): + _fields_ = [ + ('pid', c_uint), + ('usedGpuMemory', c_ulonglong), + ] + _fmt_ = {'usedGpuMemory': "%d B"} + +class c_nvmlBridgeChipInfo_t(_PrintableStructure): + _fields_ = [ + ('type', _nvmlBridgeChipType_t), + ('fwVersion', c_uint), + ] + +class c_nvmlBridgeChipHierarchy_t(_PrintableStructure): + _fields_ = [ + ('bridgeCount', c_uint), + ('bridgeChipInfo', c_nvmlBridgeChipInfo_t * 128), + ] + +class c_nvmlEccErrorCounts_t(_PrintableStructure): + _fields_ = [ + ('l1Cache', c_ulonglong), + ('l2Cache', c_ulonglong), + ('deviceMemory', c_ulonglong), + ('registerFile', c_ulonglong), + ] + +class c_nvmlUtilization_t(_PrintableStructure): + _fields_ = [ + ('gpu', c_uint), + ('memory', c_uint), + ] + _fmt_ = {'': "%d %%"} + +# Added in 2.285 +class c_nvmlHwbcEntry_t(_PrintableStructure): + _fields_ = [ + ('hwbcId', c_uint), + ('firmwareVersion', c_char * 32), + ] + +class c_nvmlValue_t(Union): + _fields_ = [ + ('dVal', c_double), + ('uiVal', c_uint), + ('ulVal', c_ulong), + ('ullVal', c_ulonglong), + ] + +class c_nvmlSample_t(_PrintableStructure): + _fields_ = [ + ('timeStamp', c_ulonglong), + ('sampleValue', c_nvmlValue_t), + ] + +class c_nvmlViolationTime_t(_PrintableStructure): + _fields_ = [ + ('referenceTime', c_ulonglong), + ('violationTime', c_ulonglong), + ] + +## Event structures +class struct_c_nvmlEventSet_t(Structure): + pass # opaque handle +c_nvmlEventSet_t = POINTER(struct_c_nvmlEventSet_t) + +nvmlEventTypeSingleBitEccError = 0x0000000000000001 +nvmlEventTypeDoubleBitEccError = 0x0000000000000002 +nvmlEventTypePState = 0x0000000000000004 +nvmlEventTypeXidCriticalError = 0x0000000000000008 +nvmlEventTypeClock = 0x0000000000000010 +nvmlEventTypeNone = 0x0000000000000000 +nvmlEventTypeAll = ( + nvmlEventTypeNone | + nvmlEventTypeSingleBitEccError | + nvmlEventTypeDoubleBitEccError | + nvmlEventTypePState | + nvmlEventTypeClock | + nvmlEventTypeXidCriticalError + ) + +## Clock Throttle Reasons defines +nvmlClocksThrottleReasonGpuIdle = 0x0000000000000001 +nvmlClocksThrottleReasonApplicationsClocksSetting = 0x0000000000000002 +nvmlClocksThrottleReasonUserDefinedClocks = nvmlClocksThrottleReasonApplicationsClocksSetting # deprecated, use nvmlClocksThrottleReasonApplicationsClocksSetting +nvmlClocksThrottleReasonSwPowerCap = 0x0000000000000004 +nvmlClocksThrottleReasonHwSlowdown = 0x0000000000000008 +nvmlClocksThrottleReasonUnknown = 0x8000000000000000 +nvmlClocksThrottleReasonNone = 0x0000000000000000 +nvmlClocksThrottleReasonAll = ( + nvmlClocksThrottleReasonNone | + nvmlClocksThrottleReasonGpuIdle | + nvmlClocksThrottleReasonApplicationsClocksSetting | + nvmlClocksThrottleReasonSwPowerCap | + nvmlClocksThrottleReasonHwSlowdown | + nvmlClocksThrottleReasonUnknown + ) + +class c_nvmlEventData_t(_PrintableStructure): + _fields_ = [ + ('device', c_nvmlDevice_t), + ('eventType', c_ulonglong), + ('eventData', c_ulonglong) + ] + _fmt_ = {'eventType': "0x%08X"} + +class c_nvmlAccountingStats_t(_PrintableStructure): + _fields_ = [ + ('gpuUtilization', c_uint), + ('memoryUtilization', c_uint), + ('maxMemoryUsage', c_ulonglong), + ('time', c_ulonglong), + ('startTime', c_ulonglong), + ('isRunning', c_uint), + ('reserved', c_uint * 5) + ] + +## C function wrappers ## +def nvmlInit(): + _LoadNvmlLibrary() + + # + # Initialize the library + # + fn = _nvmlGetFunctionPointer("nvmlInit_v2") + ret = fn() + _nvmlCheckReturn(ret) + + # Atomically update refcount + global _nvmlLib_refcount + libLoadLock.acquire() + _nvmlLib_refcount += 1 + libLoadLock.release() + return None + +def _LoadNvmlLibrary(): + ''' + Load the library if it isn't loaded already + ''' + global nvmlLib + + if (nvmlLib == None): + # lock to ensure only one caller loads the library + libLoadLock.acquire() + + try: + # ensure the library still isn't loaded + if (nvmlLib == None): + try: + if (sys.platform[:3] == "win"): + searchPaths = [ + os.path.join(os.getenv("ProgramFiles", r"C:\Program Files"), r"NVIDIA Corporation\NVSMI\nvml.dll"), + os.path.join(os.getenv("WinDir", r"C:\Windows"), r"System32\nvml.dll"), + ] + nvmlPath = next((x for x in searchPaths if os.path.isfile(x)), None) + if (nvmlPath == None): + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + else: + # cdecl calling convention + nvmlLib = CDLL(nvmlPath) + else: + # assume linux + nvmlLib = CDLL("libnvidia-ml.so.1") + except OSError as ose: + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + if (nvmlLib == None): + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + finally: + # lock is always freed + libLoadLock.release() + +def nvmlShutdown(): + # + # Leave the library loaded, but shutdown the interface + # + fn = _nvmlGetFunctionPointer("nvmlShutdown") + ret = fn() + _nvmlCheckReturn(ret) + + # Atomically update refcount + global _nvmlLib_refcount + libLoadLock.acquire() + if (0 < _nvmlLib_refcount): + _nvmlLib_refcount -= 1 + libLoadLock.release() + return None + +# Added in 2.285 +def nvmlErrorString(result): + fn = _nvmlGetFunctionPointer("nvmlErrorString") + fn.restype = c_char_p # otherwise return is an int + ret = fn(result) + return ret + +# Added in 2.285 +def nvmlSystemGetNVMLVersion(): + c_version = create_string_buffer(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlSystemGetNVMLVersion") + ret = fn(c_version, c_uint(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlSystemGetProcessName(pid): + c_name = create_string_buffer(1024) + fn = _nvmlGetFunctionPointer("nvmlSystemGetProcessName") + ret = fn(c_uint(pid), c_name, c_uint(1024)) + _nvmlCheckReturn(ret) + return c_name.value + +def nvmlSystemGetDriverVersion(): + c_version = create_string_buffer(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlSystemGetDriverVersion") + ret = fn(c_version, c_uint(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlSystemGetHicVersion(): + c_count = c_uint(0) + hics = None + fn = _nvmlGetFunctionPointer("nvmlSystemGetHicVersion") + + # get the count + ret = fn(byref(c_count), None) + + # this should only fail with insufficient size + if ((ret != NVML_SUCCESS) and + (ret != NVML_ERROR_INSUFFICIENT_SIZE)): + raise NVMLError(ret) + + # if there are no hics + if (c_count.value == 0): + return [] + + hic_array = c_nvmlHwbcEntry_t * c_count.value + hics = hic_array() + ret = fn(byref(c_count), hics) + _nvmlCheckReturn(ret) + return hics + +## Unit get functions +def nvmlUnitGetCount(): + c_count = c_uint() + fn = _nvmlGetFunctionPointer("nvmlUnitGetCount") + ret = fn(byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlUnitGetHandleByIndex(index): + c_index = c_uint(index) + unit = c_nvmlUnit_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetHandleByIndex") + ret = fn(c_index, byref(unit)) + _nvmlCheckReturn(ret) + return unit + +def nvmlUnitGetUnitInfo(unit): + c_info = c_nvmlUnitInfo_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetUnitInfo") + ret = fn(unit, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlUnitGetLedState(unit): + c_state = c_nvmlLedState_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetLedState") + ret = fn(unit, byref(c_state)) + _nvmlCheckReturn(ret) + return c_state + +def nvmlUnitGetPsuInfo(unit): + c_info = c_nvmlPSUInfo_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetPsuInfo") + ret = fn(unit, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlUnitGetTemperature(unit, type): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlUnitGetTemperature") + ret = fn(unit, c_uint(type), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +def nvmlUnitGetFanSpeedInfo(unit): + c_speeds = c_nvmlUnitFanSpeeds_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetFanSpeedInfo") + ret = fn(unit, byref(c_speeds)) + _nvmlCheckReturn(ret) + return c_speeds + +# added to API +def nvmlUnitGetDeviceCount(unit): + c_count = c_uint(0) + # query the unit to determine device count + fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") + ret = fn(unit, byref(c_count), None) + if (ret == NVML_ERROR_INSUFFICIENT_SIZE): + ret = NVML_SUCCESS + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlUnitGetDevices(unit): + c_count = c_uint(nvmlUnitGetDeviceCount(unit)) + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") + ret = fn(unit, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return c_devices + +## Device get functions +def nvmlDeviceGetCount(): + c_count = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCount_v2") + ret = fn(byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlDeviceGetHandleByIndex(index): + c_index = c_uint(index) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByIndex_v2") + ret = fn(c_index, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleBySerial(serial): + c_serial = c_char_p(serial) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleBySerial") + ret = fn(c_serial, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleByUUID(uuid): + c_uuid = c_char_p(uuid) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByUUID") + ret = fn(c_uuid, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleByPciBusId(pciBusId): + c_busId = c_char_p(pciBusId) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByPciBusId_v2") + ret = fn(c_busId, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetName(handle): + c_name = create_string_buffer(NVML_DEVICE_NAME_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetName") + ret = fn(handle, c_name, c_uint(NVML_DEVICE_NAME_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_name.value + +def nvmlDeviceGetBoardId(handle): + c_id = c_uint(); + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBoardId") + ret = fn(handle, byref(c_id)) + _nvmlCheckReturn(ret) + return c_id.value + +def nvmlDeviceGetMultiGpuBoard(handle): + c_multiGpu = c_uint(); + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMultiGpuBoard") + ret = fn(handle, byref(c_multiGpu)) + _nvmlCheckReturn(ret) + return c_multiGpu.value + +def nvmlDeviceGetBrand(handle): + c_type = _nvmlBrandType_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBrand") + ret = fn(handle, byref(c_type)) + _nvmlCheckReturn(ret) + return c_type.value + +def nvmlDeviceGetSerial(handle): + c_serial = create_string_buffer(NVML_DEVICE_SERIAL_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSerial") + ret = fn(handle, c_serial, c_uint(NVML_DEVICE_SERIAL_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_serial.value + +def nvmlDeviceGetCpuAffinity(handle, cpuSetSize): + affinity_array = c_ulonglong * cpuSetSize + c_affinity = affinity_array() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCpuAffinity") + ret = fn(handle, cpuSetSize, byref(c_affinity)) + _nvmlCheckReturn(ret) + return c_affinity + +def nvmlDeviceSetCpuAffinity(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetCpuAffinity") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearCpuAffinity(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearCpuAffinity") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetMinorNumber(handle): + c_minor_number = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMinorNumber") + ret = fn(handle, byref(c_minor_number)) + _nvmlCheckReturn(ret) + return c_minor_number.value + +def nvmlDeviceGetUUID(handle): + c_uuid = create_string_buffer(NVML_DEVICE_UUID_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetUUID") + ret = fn(handle, c_uuid, c_uint(NVML_DEVICE_UUID_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_uuid.value + +def nvmlDeviceGetInforomVersion(handle, infoRomObject): + c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomVersion") + ret = fn(handle, _nvmlInforomObject_t(infoRomObject), + c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 4.304 +def nvmlDeviceGetInforomImageVersion(handle): + c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomImageVersion") + ret = fn(handle, c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 4.304 +def nvmlDeviceGetInforomConfigurationChecksum(handle): + c_checksum = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomConfigurationChecksum") + ret = fn(handle, byref(c_checksum)) + _nvmlCheckReturn(ret) + return c_checksum.value + +# Added in 4.304 +def nvmlDeviceValidateInforom(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceValidateInforom") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetDisplayMode(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceGetDisplayActive(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayActive") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + + +def nvmlDeviceGetPersistenceMode(handle): + c_state = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPersistenceMode") + ret = fn(handle, byref(c_state)) + _nvmlCheckReturn(ret) + return c_state.value + +def nvmlDeviceGetPciInfo(handle): + c_info = nvmlPciInfo_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPciInfo_v2") + ret = fn(handle, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlDeviceGetClockInfo(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetClockInfo") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 2.285 +def nvmlDeviceGetMaxClockInfo(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxClockInfo") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 4.304 +def nvmlDeviceGetApplicationsClock(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetApplicationsClock") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 5.319 +def nvmlDeviceGetDefaultApplicationsClock(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDefaultApplicationsClock") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 4.304 +def nvmlDeviceGetSupportedMemoryClocks(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedMemoryClocks") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no clocks + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + clocks_array = c_uint * c_count.value + c_clocks = clocks_array() + + # make the call again + ret = fn(handle, byref(c_count), c_clocks) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + procs.append(c_clocks[i]) + + return procs + else: + # error case + raise NVMLError(ret) + +# Added in 4.304 +def nvmlDeviceGetSupportedGraphicsClocks(handle, memoryClockMHz): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedGraphicsClocks") + ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no clocks + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + clocks_array = c_uint * c_count.value + c_clocks = clocks_array() + + # make the call again + ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), c_clocks) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + procs.append(c_clocks[i]) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetFanSpeed(handle): + c_speed = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetFanSpeed") + ret = fn(handle, byref(c_speed)) + _nvmlCheckReturn(ret) + return c_speed.value + +def nvmlDeviceGetTemperature(handle, sensor): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperature") + ret = fn(handle, _nvmlTemperatureSensors_t(sensor), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +def nvmlDeviceGetTemperatureThreshold(handle, threshold): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperatureThreshold") + ret = fn(handle, _nvmlTemperatureThresholds_t(threshold), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +# DEPRECATED use nvmlDeviceGetPerformanceState +def nvmlDeviceGetPowerState(handle): + c_pstate = _nvmlPstates_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerState") + ret = fn(handle, byref(c_pstate)) + _nvmlCheckReturn(ret) + return c_pstate.value + +def nvmlDeviceGetPerformanceState(handle): + c_pstate = _nvmlPstates_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPerformanceState") + ret = fn(handle, byref(c_pstate)) + _nvmlCheckReturn(ret) + return c_pstate.value + +def nvmlDeviceGetPowerManagementMode(handle): + c_pcapMode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementMode") + ret = fn(handle, byref(c_pcapMode)) + _nvmlCheckReturn(ret) + return c_pcapMode.value + +def nvmlDeviceGetPowerManagementLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + +# Added in 4.304 +def nvmlDeviceGetPowerManagementLimitConstraints(handle): + c_minLimit = c_uint() + c_maxLimit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimitConstraints") + ret = fn(handle, byref(c_minLimit), byref(c_maxLimit)) + _nvmlCheckReturn(ret) + return [c_minLimit.value, c_maxLimit.value] + +# Added in 4.304 +def nvmlDeviceGetPowerManagementDefaultLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementDefaultLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + + +# Added in 331 +def nvmlDeviceGetEnforcedPowerLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEnforcedPowerLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + +def nvmlDeviceGetPowerUsage(handle): + c_watts = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerUsage") + ret = fn(handle, byref(c_watts)) + _nvmlCheckReturn(ret) + return c_watts.value + +# Added in 4.304 +def nvmlDeviceGetGpuOperationMode(handle): + c_currState = _nvmlGpuOperationMode_t() + c_pendingState = _nvmlGpuOperationMode_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetGpuOperationMode") + ret = fn(handle, byref(c_currState), byref(c_pendingState)) + _nvmlCheckReturn(ret) + return [c_currState.value, c_pendingState.value] + +# Added in 4.304 +def nvmlDeviceGetCurrentGpuOperationMode(handle): + return nvmlDeviceGetGpuOperationMode(handle)[0] + +# Added in 4.304 +def nvmlDeviceGetPendingGpuOperationMode(handle): + return nvmlDeviceGetGpuOperationMode(handle)[1] + +def nvmlDeviceGetMemoryInfo(handle): + c_memory = c_nvmlMemory_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryInfo") + ret = fn(handle, byref(c_memory)) + _nvmlCheckReturn(ret) + return c_memory + +def nvmlDeviceGetBAR1MemoryInfo(handle): + c_bar1_memory = c_nvmlBAR1Memory_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBAR1MemoryInfo") + ret = fn(handle, byref(c_bar1_memory)) + _nvmlCheckReturn(ret) + return c_bar1_memory + +def nvmlDeviceGetComputeMode(handle): + c_mode = _nvmlComputeMode_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceGetEccMode(handle): + c_currState = _nvmlEnableState_t() + c_pendingState = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEccMode") + ret = fn(handle, byref(c_currState), byref(c_pendingState)) + _nvmlCheckReturn(ret) + return [c_currState.value, c_pendingState.value] + +# added to API +def nvmlDeviceGetCurrentEccMode(handle): + return nvmlDeviceGetEccMode(handle)[0] + +# added to API +def nvmlDeviceGetPendingEccMode(handle): + return nvmlDeviceGetEccMode(handle)[1] + +def nvmlDeviceGetTotalEccErrors(handle, errorType, counterType): + c_count = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTotalEccErrors") + ret = fn(handle, _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +# This is deprecated, instead use nvmlDeviceGetMemoryErrorCounter +def nvmlDeviceGetDetailedEccErrors(handle, errorType, counterType): + c_counts = c_nvmlEccErrorCounts_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDetailedEccErrors") + ret = fn(handle, _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), byref(c_counts)) + _nvmlCheckReturn(ret) + return c_counts + +# Added in 4.304 +def nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, locationType): + c_count = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryErrorCounter") + ret = fn(handle, + _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), + _nvmlMemoryLocation_t(locationType), + byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlDeviceGetUtilizationRates(handle): + c_util = c_nvmlUtilization_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetUtilizationRates") + ret = fn(handle, byref(c_util)) + _nvmlCheckReturn(ret) + return c_util + +def nvmlDeviceGetEncoderUtilization(handle): + c_util = c_uint() + c_samplingPeriod = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEncoderUtilization") + ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) + _nvmlCheckReturn(ret) + return [c_util.value, c_samplingPeriod.value] + +def nvmlDeviceGetDecoderUtilization(handle): + c_util = c_uint() + c_samplingPeriod = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDecoderUtilization") + ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) + _nvmlCheckReturn(ret) + return [c_util.value, c_samplingPeriod.value] + +def nvmlDeviceGetPcieReplayCounter(handle): + c_replay = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieReplayCounter") + ret = fn(handle, byref(c_replay)) + _nvmlCheckReturn(ret) + return c_replay.value + +def nvmlDeviceGetDriverModel(handle): + c_currModel = _nvmlDriverModel_t() + c_pendingModel = _nvmlDriverModel_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDriverModel") + ret = fn(handle, byref(c_currModel), byref(c_pendingModel)) + _nvmlCheckReturn(ret) + return [c_currModel.value, c_pendingModel.value] + +# added to API +def nvmlDeviceGetCurrentDriverModel(handle): + return nvmlDeviceGetDriverModel(handle)[0] + +# added to API +def nvmlDeviceGetPendingDriverModel(handle): + return nvmlDeviceGetDriverModel(handle)[1] + +# Added in 2.285 +def nvmlDeviceGetVbiosVersion(handle): + c_version = create_string_buffer(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetVbiosVersion") + ret = fn(handle, c_version, c_uint(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlDeviceGetComputeRunningProcesses(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no running processes + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + # oversize the array incase more processes are created + c_count.value = c_count.value * 2 + 5 + proc_array = c_nvmlProcessInfo_t * c_count.value + c_procs = proc_array() + + # make the call again + ret = fn(handle, byref(c_count), c_procs) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + # use an alternative struct for this object + obj = nvmlStructToFriendlyObject(c_procs[i]) + if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + obj.usedGpuMemory = None + procs.append(obj) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetGraphicsRunningProcesses(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetGraphicsRunningProcesses") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no running processes + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + # oversize the array incase more processes are created + c_count.value = c_count.value * 2 + 5 + proc_array = c_nvmlProcessInfo_t * c_count.value + c_procs = proc_array() + + # make the call again + ret = fn(handle, byref(c_count), c_procs) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + # use an alternative struct for this object + obj = nvmlStructToFriendlyObject(c_procs[i]) + if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + obj.usedGpuMemory = None + procs.append(obj) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetAutoBoostedClocksEnabled(handle): + c_isEnabled = _nvmlEnableState_t() + c_defaultIsEnabled = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAutoBoostedClocksEnabled") + ret = fn(handle, byref(c_isEnabled), byref(c_defaultIsEnabled)) + _nvmlCheckReturn(ret) + return [c_isEnabled.value, c_defaultIsEnabled.value] + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +## Set functions +def nvmlUnitSetLedState(unit, color): + fn = _nvmlGetFunctionPointer("nvmlUnitSetLedState") + ret = fn(unit, _nvmlLedColor_t(color)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetPersistenceMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetPersistenceMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetComputeMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetComputeMode") + ret = fn(handle, _nvmlComputeMode_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetEccMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetEccMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearEccErrorCounts(handle, counterType): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearEccErrorCounts") + ret = fn(handle, _nvmlEccCounterType_t(counterType)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetDriverModel(handle, model): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetDriverModel") + ret = fn(handle, _nvmlDriverModel_t(model)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetAutoBoostedClocksEnabled(handle, enabled): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAutoBoostedClocksEnabled") + ret = fn(handle, _nvmlEnableState_t(enabled)) + _nvmlCheckReturn(ret) + return None + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +def nvmlDeviceSetDefaultAutoBoostedClocksEnabled(handle, enabled, flags): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetDefaultAutoBoostedClocksEnabled") + ret = fn(handle, _nvmlEnableState_t(enabled), c_uint(flags)) + _nvmlCheckReturn(ret) + return None + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +# Added in 4.304 +def nvmlDeviceSetApplicationsClocks(handle, maxMemClockMHz, maxGraphicsClockMHz): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetApplicationsClocks") + ret = fn(handle, c_uint(maxMemClockMHz), c_uint(maxGraphicsClockMHz)) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceResetApplicationsClocks(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceResetApplicationsClocks") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceSetPowerManagementLimit(handle, limit): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetPowerManagementLimit") + ret = fn(handle, c_uint(limit)) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceSetGpuOperationMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetGpuOperationMode") + ret = fn(handle, _nvmlGpuOperationMode_t(mode)) + _nvmlCheckReturn(ret) + return None + +# Added in 2.285 +def nvmlEventSetCreate(): + fn = _nvmlGetFunctionPointer("nvmlEventSetCreate") + eventSet = c_nvmlEventSet_t() + ret = fn(byref(eventSet)) + _nvmlCheckReturn(ret) + return eventSet + +# Added in 2.285 +def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet): + fn = _nvmlGetFunctionPointer("nvmlDeviceRegisterEvents") + ret = fn(handle, c_ulonglong(eventTypes), eventSet) + _nvmlCheckReturn(ret) + return None + +# Added in 2.285 +def nvmlDeviceGetSupportedEventTypes(handle): + c_eventTypes = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedEventTypes") + ret = fn(handle, byref(c_eventTypes)) + _nvmlCheckReturn(ret) + return c_eventTypes.value + +# Added in 2.285 +# raises NVML_ERROR_TIMEOUT exception on timeout +def nvmlEventSetWait(eventSet, timeoutms): + fn = _nvmlGetFunctionPointer("nvmlEventSetWait") + data = c_nvmlEventData_t() + ret = fn(eventSet, byref(data), c_uint(timeoutms)) + _nvmlCheckReturn(ret) + return data + +# Added in 2.285 +def nvmlEventSetFree(eventSet): + fn = _nvmlGetFunctionPointer("nvmlEventSetFree") + ret = fn(eventSet) + _nvmlCheckReturn(ret) + return None + +# Added in 3.295 +def nvmlDeviceOnSameBoard(handle1, handle2): + fn = _nvmlGetFunctionPointer("nvmlDeviceOnSameBoard") + onSameBoard = c_int() + ret = fn(handle1, handle2, byref(onSameBoard)) + _nvmlCheckReturn(ret) + return (onSameBoard.value != 0) + +# Added in 3.295 +def nvmlDeviceGetCurrPcieLinkGeneration(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkGeneration") + gen = c_uint() + ret = fn(handle, byref(gen)) + _nvmlCheckReturn(ret) + return gen.value + +# Added in 3.295 +def nvmlDeviceGetMaxPcieLinkGeneration(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkGeneration") + gen = c_uint() + ret = fn(handle, byref(gen)) + _nvmlCheckReturn(ret) + return gen.value + +# Added in 3.295 +def nvmlDeviceGetCurrPcieLinkWidth(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkWidth") + width = c_uint() + ret = fn(handle, byref(width)) + _nvmlCheckReturn(ret) + return width.value + +# Added in 3.295 +def nvmlDeviceGetMaxPcieLinkWidth(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkWidth") + width = c_uint() + ret = fn(handle, byref(width)) + _nvmlCheckReturn(ret) + return width.value + +# Added in 4.304 +def nvmlDeviceGetSupportedClocksThrottleReasons(handle): + c_reasons= c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedClocksThrottleReasons") + ret = fn(handle, byref(c_reasons)) + _nvmlCheckReturn(ret) + return c_reasons.value + +# Added in 4.304 +def nvmlDeviceGetCurrentClocksThrottleReasons(handle): + c_reasons= c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrentClocksThrottleReasons") + ret = fn(handle, byref(c_reasons)) + _nvmlCheckReturn(ret) + return c_reasons.value + +# Added in 5.319 +def nvmlDeviceGetIndex(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetIndex") + c_index = c_uint() + ret = fn(handle, byref(c_index)) + _nvmlCheckReturn(ret) + return c_index.value + +# Added in 5.319 +def nvmlDeviceGetAccountingMode(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceSetAccountingMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAccountingMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearAccountingPids(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearAccountingPids") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetAccountingStats(handle, pid): + stats = c_nvmlAccountingStats_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingStats") + ret = fn(handle, c_uint(pid), byref(stats)) + _nvmlCheckReturn(ret) + if (stats.maxMemoryUsage == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + stats.maxMemoryUsage = None + return stats + +def nvmlDeviceGetAccountingPids(handle): + count = c_uint(nvmlDeviceGetAccountingBufferSize(handle)) + pids = (c_uint * count.value)() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingPids") + ret = fn(handle, byref(count), pids) + _nvmlCheckReturn(ret) + return map(int, pids[0:count.value]) + +def nvmlDeviceGetAccountingBufferSize(handle): + bufferSize = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingBufferSize") + ret = fn(handle, byref(bufferSize)) + _nvmlCheckReturn(ret) + return int(bufferSize.value) + +def nvmlDeviceGetRetiredPages(device, sourceFilter): + c_source = _nvmlPageRetirementCause_t(sourceFilter) + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPages") + + # First call will get the size + ret = fn(device, c_source, byref(c_count), None) + + # this should only fail with insufficient size + if ((ret != NVML_SUCCESS) and + (ret != NVML_ERROR_INSUFFICIENT_SIZE)): + raise NVMLError(ret) + + # call again with a buffer + # oversize the array for the rare cases where additional pages + # are retired between NVML calls + c_count.value = c_count.value * 2 + 5 + page_array = c_ulonglong * c_count.value + c_pages = page_array() + ret = fn(device, c_source, byref(c_count), c_pages) + _nvmlCheckReturn(ret) + return map(int, c_pages[0:c_count.value]) + +def nvmlDeviceGetRetiredPagesPendingStatus(device): + c_pending = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPagesPendingStatus") + ret = fn(device, byref(c_pending)) + _nvmlCheckReturn(ret) + return int(c_pending.value) + +def nvmlDeviceGetAPIRestriction(device, apiType): + c_permission = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAPIRestriction") + ret = fn(device, _nvmlRestrictedAPI_t(apiType), byref(c_permission)) + _nvmlCheckReturn(ret) + return int(c_permission.value) + +def nvmlDeviceSetAPIRestriction(handle, apiType, isRestricted): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAPIRestriction") + ret = fn(handle, _nvmlRestrictedAPI_t(apiType), _nvmlEnableState_t(isRestricted)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetBridgeChipInfo(handle): + bridgeHierarchy = c_nvmlBridgeChipHierarchy_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBridgeChipInfo") + ret = fn(handle, byref(bridgeHierarchy)) + _nvmlCheckReturn(ret) + return bridgeHierarchy + +def nvmlDeviceGetSamples(device, sampling_type, timeStamp): + c_sampling_type = _nvmlSamplingType_t(sampling_type) + c_time_stamp = c_ulonglong(timeStamp) + c_sample_count = c_uint(0) + c_sample_value_type = _nvmlValueType_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSamples") + + ## First Call gets the size + ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), None) + + # Stop if this fails + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + + sampleArray = c_sample_count.value * c_nvmlSample_t + c_samples = sampleArray() + ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), c_samples) + _nvmlCheckReturn(ret) + return (c_sample_value_type.value, c_samples[0:c_sample_count.value]) + +def nvmlDeviceGetViolationStatus(device, perfPolicyType): + c_perfPolicy_type = _nvmlPerfPolicyType_t(perfPolicyType) + c_violTime = c_nvmlViolationTime_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetViolationStatus") + + ## Invoke the method to get violation time + ret = fn(device, c_perfPolicy_type, byref(c_violTime)) + _nvmlCheckReturn(ret) + return c_violTime + +def nvmlDeviceGetPcieThroughput(device, counter): + c_util = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieThroughput") + ret = fn(device, _nvmlPcieUtilCounter_t(counter), byref(c_util)) + _nvmlCheckReturn(ret) + return c_util.value + +def nvmlSystemGetTopologyGpuSet(cpuNumber): + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlSystemGetTopologyGpuSet") + + # First call will get the size + ret = fn(cpuNumber, byref(c_count), None) + + if ret != NVML_SUCCESS: + raise NVMLError(ret) + print(c_count.value) + # call again with a buffer + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + ret = fn(cpuNumber, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return map(None, c_devices[0:c_count.value]) + +def nvmlDeviceGetTopologyNearestGpus(device, level): + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyNearestGpus") + + # First call will get the size + ret = fn(device, level, byref(c_count), None) + + if ret != NVML_SUCCESS: + raise NVMLError(ret) + + # call again with a buffer + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + ret = fn(device, level, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return map(None, c_devices[0:c_count.value]) + +def nvmlDeviceGetTopologyCommonAncestor(device1, device2): + c_level = _nvmlGpuTopologyLevel_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyCommonAncestor") + ret = fn(device1, device2, byref(c_level)) + _nvmlCheckReturn(ret) + return c_level.value + +#DeepFaceLab additions +def nvmlDeviceGetCudaComputeCapability(device): + c_major = c_int() + c_minor = c_int() + + try: + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCudaComputeCapability") + except: + return 9, 9 + + # get the count + ret = fn(device, byref(c_major), byref(c_minor)) + + # this should only fail with insufficient size + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + return c_major.value, c_minor.value \ No newline at end of file diff --git a/requirements-colab.txt b/requirements-colab.txt index 7c69191..5edac84 100644 --- a/requirements-colab.txt +++ b/requirements-colab.txt @@ -1,10 +1,10 @@ -numpy==1.16.3 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.0.0.21 -tensorflow-gpu==1.13.1 -plaidml-keras==0.5.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 +numpy==1.16.3 +h5py==2.9.0 +Keras==2.2.4 +opencv-python==4.0.0.21 +tensorflow-gpu==1.13.1 +plaidml-keras==0.5.0 +scikit-image +tqdm +ffmpeg-python==0.1.17 git+https://www.github.com/keras-team/keras-contrib.git \ No newline at end of file diff --git a/requirements-cpu.txt b/requirements-cpu.txt index 6f5f27f..dcd38c5 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -1,9 +1,9 @@ -numpy==1.16.3 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.0.0.21 -tensorflow==1.12.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git +numpy==1.16.3 +h5py==2.9.0 +Keras==2.2.4 +opencv-python==4.0.0.21 +tensorflow==1.12.0 +scikit-image +tqdm +ffmpeg-python==0.1.17 +git+https://www.github.com/keras-team/keras-contrib.git diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 5e50c09..06b8d42 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -1,11 +1,11 @@ -numpy==1.16.3 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.0.0.21 -tensorflow-gpu==1.12.0 -plaidml==0.6.0 -plaidml-keras==0.5.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git +numpy==1.16.3 +h5py==2.9.0 +Keras==2.2.4 +opencv-python==4.0.0.21 +tensorflow-gpu==1.12.0 +plaidml==0.6.0 +plaidml-keras==0.5.0 +scikit-image +tqdm +ffmpeg-python==0.1.17 +git+https://www.github.com/keras-team/keras-contrib.git diff --git a/requirements-opencl.txt b/requirements-opencl.txt index 99a13c1..12bc049 100644 --- a/requirements-opencl.txt +++ b/requirements-opencl.txt @@ -1,11 +1,11 @@ -numpy==1.16.3 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.0.0.21 -tensorflow==1.12.0 -plaidml==0.6.0 -plaidml-keras==0.5.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git +numpy==1.16.3 +h5py==2.9.0 +Keras==2.2.4 +opencv-python==4.0.0.21 +tensorflow==1.12.0 +plaidml==0.6.0 +plaidml-keras==0.5.0 +scikit-image +tqdm +ffmpeg-python==0.1.17 +git+https://www.github.com/keras-team/keras-contrib.git diff --git a/samplelib/Sample.py b/samplelib/Sample.py index d22a869..cde8b62 100644 --- a/samplelib/Sample.py +++ b/samplelib/Sample.py @@ -1,74 +1,74 @@ -from enum import IntEnum -from pathlib import Path - -import cv2 -import numpy as np - -from utils.cv2_utils import * -from utils.DFLJPG import DFLJPG -from utils.DFLPNG import DFLPNG - - -class SampleType(IntEnum): - IMAGE = 0 #raw image - - FACE_BEGIN = 1 - FACE = 1 #aligned face unsorted - FACE_YAW_SORTED = 2 #sorted by yaw - FACE_YAW_SORTED_AS_TARGET = 3 #sorted by yaw and included only yaws which exist in TARGET also automatic mirrored - FACE_TEMPORAL_SORTED = 4 - FACE_END = 4 - - QTY = 5 - -class Sample(object): - def __init__(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, ie_polys=None, pitch_yaw_roll=None, source_filename=None, mirror=None, close_target_list=None, fanseg_mask_exist=False): - self.sample_type = sample_type if sample_type is not None else SampleType.IMAGE - self.filename = filename - self.face_type = face_type - self.shape = shape - self.landmarks = np.array(landmarks) if landmarks is not None else None - self.ie_polys = ie_polys - self.pitch_yaw_roll = pitch_yaw_roll - self.source_filename = source_filename - self.mirror = mirror - self.close_target_list = close_target_list - self.fanseg_mask_exist = fanseg_mask_exist - - def copy_and_set(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, ie_polys=None, pitch_yaw_roll=None, source_filename=None, mirror=None, close_target_list=None, fanseg_mask=None, fanseg_mask_exist=None): - return Sample( - sample_type=sample_type if sample_type is not None else self.sample_type, - filename=filename if filename is not None else self.filename, - face_type=face_type if face_type is not None else self.face_type, - shape=shape if shape is not None else self.shape, - landmarks=landmarks if landmarks is not None else self.landmarks.copy(), - ie_polys=ie_polys if ie_polys is not None else self.ie_polys, - pitch_yaw_roll=pitch_yaw_roll if pitch_yaw_roll is not None else self.pitch_yaw_roll, - source_filename=source_filename if source_filename is not None else self.source_filename, - mirror=mirror if mirror is not None else self.mirror, - close_target_list=close_target_list if close_target_list is not None else self.close_target_list, - fanseg_mask_exist=fanseg_mask_exist if fanseg_mask_exist is not None else self.fanseg_mask_exist) - - def load_bgr(self): - img = cv2_imread (self.filename).astype(np.float32) / 255.0 - if self.mirror: - img = img[:,::-1].copy() - return img - - def load_fanseg_mask(self): - if self.fanseg_mask_exist: - filepath = Path(self.filename) - if filepath.suffix == '.png': - dflimg = DFLPNG.load ( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - return dflimg.get_fanseg_mask() - - return None - - def get_random_close_target_sample(self): - if self.close_target_list is None: - return None - return self.close_target_list[randint (0, len(self.close_target_list)-1)] +from enum import IntEnum +from pathlib import Path + +import cv2 +import numpy as np + +from utils.cv2_utils import * +from utils.DFLJPG import DFLJPG +from utils.DFLPNG import DFLPNG + + +class SampleType(IntEnum): + IMAGE = 0 #raw image + + FACE_BEGIN = 1 + FACE = 1 #aligned face unsorted + FACE_YAW_SORTED = 2 #sorted by yaw + FACE_YAW_SORTED_AS_TARGET = 3 #sorted by yaw and included only yaws which exist in TARGET also automatic mirrored + FACE_TEMPORAL_SORTED = 4 + FACE_END = 4 + + QTY = 5 + +class Sample(object): + def __init__(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, ie_polys=None, pitch_yaw_roll=None, source_filename=None, mirror=None, close_target_list=None, fanseg_mask_exist=False): + self.sample_type = sample_type if sample_type is not None else SampleType.IMAGE + self.filename = filename + self.face_type = face_type + self.shape = shape + self.landmarks = np.array(landmarks) if landmarks is not None else None + self.ie_polys = ie_polys + self.pitch_yaw_roll = pitch_yaw_roll + self.source_filename = source_filename + self.mirror = mirror + self.close_target_list = close_target_list + self.fanseg_mask_exist = fanseg_mask_exist + + def copy_and_set(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, ie_polys=None, pitch_yaw_roll=None, source_filename=None, mirror=None, close_target_list=None, fanseg_mask=None, fanseg_mask_exist=None): + return Sample( + sample_type=sample_type if sample_type is not None else self.sample_type, + filename=filename if filename is not None else self.filename, + face_type=face_type if face_type is not None else self.face_type, + shape=shape if shape is not None else self.shape, + landmarks=landmarks if landmarks is not None else self.landmarks.copy(), + ie_polys=ie_polys if ie_polys is not None else self.ie_polys, + pitch_yaw_roll=pitch_yaw_roll if pitch_yaw_roll is not None else self.pitch_yaw_roll, + source_filename=source_filename if source_filename is not None else self.source_filename, + mirror=mirror if mirror is not None else self.mirror, + close_target_list=close_target_list if close_target_list is not None else self.close_target_list, + fanseg_mask_exist=fanseg_mask_exist if fanseg_mask_exist is not None else self.fanseg_mask_exist) + + def load_bgr(self): + img = cv2_imread (self.filename).astype(np.float32) / 255.0 + if self.mirror: + img = img[:,::-1].copy() + return img + + def load_fanseg_mask(self): + if self.fanseg_mask_exist: + filepath = Path(self.filename) + if filepath.suffix == '.png': + dflimg = DFLPNG.load ( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + return dflimg.get_fanseg_mask() + + return None + + def get_random_close_target_sample(self): + if self.close_target_list is None: + return None + return self.close_target_list[randint (0, len(self.close_target_list)-1)] diff --git a/samplelib/SampleGeneratorBase.py b/samplelib/SampleGeneratorBase.py index dec741e..42e9930 100644 --- a/samplelib/SampleGeneratorBase.py +++ b/samplelib/SampleGeneratorBase.py @@ -1,24 +1,24 @@ -from pathlib import Path - -''' -You can implement your own SampleGenerator -''' -class SampleGeneratorBase(object): - - - def __init__ (self, samples_path, debug, batch_size): - if samples_path is None: - raise Exception('samples_path is None') - - self.samples_path = Path(samples_path) - self.debug = debug - self.batch_size = 1 if self.debug else batch_size - - #overridable - def __iter__(self): - #implement your own iterator - return self - - def __next__(self): - #implement your own iterator - return None +from pathlib import Path + +''' +You can implement your own SampleGenerator +''' +class SampleGeneratorBase(object): + + + def __init__ (self, samples_path, debug, batch_size): + if samples_path is None: + raise Exception('samples_path is None') + + self.samples_path = Path(samples_path) + self.debug = debug + self.batch_size = 1 if self.debug else batch_size + + #overridable + def __iter__(self): + #implement your own iterator + return self + + def __next__(self): + #implement your own iterator + return None diff --git a/samplelib/SampleGeneratorFace.py b/samplelib/SampleGeneratorFace.py index 593f8e2..095d955 100644 --- a/samplelib/SampleGeneratorFace.py +++ b/samplelib/SampleGeneratorFace.py @@ -1,142 +1,142 @@ -import multiprocessing -import traceback - -import cv2 -import numpy as np - -from facelib import LandmarksProcessor -from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor, - SampleType) -from utils import iter_utils - - -''' -arg -output_sample_types = [ - [SampleProcessor.TypeFlags, size, (optional) {} opts ] , - ... - ] -''' -class SampleGeneratorFace(SampleGeneratorBase): - def __init__ (self, samples_path, debug, batch_size, sort_by_yaw=False, sort_by_yaw_target_samples_path=None, random_ct_samples_path=None, sample_process_options=SampleProcessor.Options(), output_sample_types=[], add_sample_idx=False, generators_count=2, generators_random_seed=None, **kwargs): - super().__init__(samples_path, debug, batch_size) - self.sample_process_options = sample_process_options - self.output_sample_types = output_sample_types - self.add_sample_idx = add_sample_idx - - if sort_by_yaw_target_samples_path is not None: - self.sample_type = SampleType.FACE_YAW_SORTED_AS_TARGET - elif sort_by_yaw: - self.sample_type = SampleType.FACE_YAW_SORTED - else: - self.sample_type = SampleType.FACE - - if generators_random_seed is not None and len(generators_random_seed) != generators_count: - raise ValueError("len(generators_random_seed) != generators_count") - - self.generators_random_seed = generators_random_seed - - samples = SampleLoader.load (self.sample_type, self.samples_path, sort_by_yaw_target_samples_path) - - ct_samples = SampleLoader.load (SampleType.FACE, random_ct_samples_path) if random_ct_samples_path is not None else None - self.random_ct_sample_chance = 100 - - if self.debug: - self.generators_count = 1 - self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (0, samples, ct_samples) )] - else: - self.generators_count = min ( generators_count, len(samples) ) - self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (i, samples[i::self.generators_count], ct_samples ) ) for i in range(self.generators_count) ] - - self.generator_counter = -1 - - def __iter__(self): - return self - - def __next__(self): - self.generator_counter += 1 - generator = self.generators[self.generator_counter % len(self.generators) ] - return next(generator) - - def batch_func(self, param ): - generator_id, samples, ct_samples = param - - if self.generators_random_seed is not None: - np.random.seed ( self.generators_random_seed[generator_id] ) - - samples_len = len(samples) - samples_idxs = [*range(samples_len)] - - ct_samples_len = len(ct_samples) if ct_samples is not None else 0 - - if len(samples_idxs) == 0: - raise ValueError('No training data provided.') - - if self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: - if all ( [ samples[idx] == None for idx in samples_idxs] ): - raise ValueError('Not enough training data. Gather more faces!') - - if self.sample_type == SampleType.FACE: - shuffle_idxs = [] - elif self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: - shuffle_idxs = [] - shuffle_idxs_2D = [[]]*samples_len - - while True: - batches = None - for n_batch in range(self.batch_size): - while True: - sample = None - - if self.sample_type == SampleType.FACE: - if len(shuffle_idxs) == 0: - shuffle_idxs = samples_idxs.copy() - np.random.shuffle(shuffle_idxs) - - idx = shuffle_idxs.pop() - sample = samples[ idx ] - - elif self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: - if len(shuffle_idxs) == 0: - shuffle_idxs = samples_idxs.copy() - np.random.shuffle(shuffle_idxs) - - idx = shuffle_idxs.pop() - if samples[idx] != None: - if len(shuffle_idxs_2D[idx]) == 0: - a = shuffle_idxs_2D[idx] = [ *range(len(samples[idx])) ] - np.random.shuffle (a) - - idx2 = shuffle_idxs_2D[idx].pop() - sample = samples[idx][idx2] - - idx = (idx << 16) | (idx2 & 0xFFFF) - - if sample is not None: - try: - ct_sample=None - if ct_samples is not None: - if np.random.randint(100) < self.random_ct_sample_chance: - ct_sample=ct_samples[np.random.randint(ct_samples_len)] - - x = SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug, ct_sample=ct_sample) - except: - raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) - - if type(x) != tuple and type(x) != list: - raise Exception('SampleProcessor.process returns NOT tuple/list') - - if batches is None: - batches = [ [] for _ in range(len(x)) ] - if self.add_sample_idx: - batches += [ [] ] - i_sample_idx = len(batches)-1 - - for i in range(len(x)): - batches[i].append ( x[i] ) - - if self.add_sample_idx: - batches[i_sample_idx].append (idx) - - break - yield [ np.array(batch) for batch in batches] +import multiprocessing +import traceback + +import cv2 +import numpy as np + +from facelib import LandmarksProcessor +from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor, + SampleType) +from utils import iter_utils + + +''' +arg +output_sample_types = [ + [SampleProcessor.TypeFlags, size, (optional) {} opts ] , + ... + ] +''' +class SampleGeneratorFace(SampleGeneratorBase): + def __init__ (self, samples_path, debug, batch_size, sort_by_yaw=False, sort_by_yaw_target_samples_path=None, random_ct_samples_path=None, sample_process_options=SampleProcessor.Options(), output_sample_types=[], add_sample_idx=False, generators_count=2, generators_random_seed=None, **kwargs): + super().__init__(samples_path, debug, batch_size) + self.sample_process_options = sample_process_options + self.output_sample_types = output_sample_types + self.add_sample_idx = add_sample_idx + + if sort_by_yaw_target_samples_path is not None: + self.sample_type = SampleType.FACE_YAW_SORTED_AS_TARGET + elif sort_by_yaw: + self.sample_type = SampleType.FACE_YAW_SORTED + else: + self.sample_type = SampleType.FACE + + if generators_random_seed is not None and len(generators_random_seed) != generators_count: + raise ValueError("len(generators_random_seed) != generators_count") + + self.generators_random_seed = generators_random_seed + + samples = SampleLoader.load (self.sample_type, self.samples_path, sort_by_yaw_target_samples_path) + + ct_samples = SampleLoader.load (SampleType.FACE, random_ct_samples_path) if random_ct_samples_path is not None else None + self.random_ct_sample_chance = 100 + + if self.debug: + self.generators_count = 1 + self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (0, samples, ct_samples) )] + else: + self.generators_count = min ( generators_count, len(samples) ) + self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (i, samples[i::self.generators_count], ct_samples ) ) for i in range(self.generators_count) ] + + self.generator_counter = -1 + + def __iter__(self): + return self + + def __next__(self): + self.generator_counter += 1 + generator = self.generators[self.generator_counter % len(self.generators) ] + return next(generator) + + def batch_func(self, param ): + generator_id, samples, ct_samples = param + + if self.generators_random_seed is not None: + np.random.seed ( self.generators_random_seed[generator_id] ) + + samples_len = len(samples) + samples_idxs = [*range(samples_len)] + + ct_samples_len = len(ct_samples) if ct_samples is not None else 0 + + if len(samples_idxs) == 0: + raise ValueError('No training data provided.') + + if self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: + if all ( [ samples[idx] == None for idx in samples_idxs] ): + raise ValueError('Not enough training data. Gather more faces!') + + if self.sample_type == SampleType.FACE: + shuffle_idxs = [] + elif self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: + shuffle_idxs = [] + shuffle_idxs_2D = [[]]*samples_len + + while True: + batches = None + for n_batch in range(self.batch_size): + while True: + sample = None + + if self.sample_type == SampleType.FACE: + if len(shuffle_idxs) == 0: + shuffle_idxs = samples_idxs.copy() + np.random.shuffle(shuffle_idxs) + + idx = shuffle_idxs.pop() + sample = samples[ idx ] + + elif self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: + if len(shuffle_idxs) == 0: + shuffle_idxs = samples_idxs.copy() + np.random.shuffle(shuffle_idxs) + + idx = shuffle_idxs.pop() + if samples[idx] != None: + if len(shuffle_idxs_2D[idx]) == 0: + a = shuffle_idxs_2D[idx] = [ *range(len(samples[idx])) ] + np.random.shuffle (a) + + idx2 = shuffle_idxs_2D[idx].pop() + sample = samples[idx][idx2] + + idx = (idx << 16) | (idx2 & 0xFFFF) + + if sample is not None: + try: + ct_sample=None + if ct_samples is not None: + if np.random.randint(100) < self.random_ct_sample_chance: + ct_sample=ct_samples[np.random.randint(ct_samples_len)] + + x = SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug, ct_sample=ct_sample) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) + + if type(x) != tuple and type(x) != list: + raise Exception('SampleProcessor.process returns NOT tuple/list') + + if batches is None: + batches = [ [] for _ in range(len(x)) ] + if self.add_sample_idx: + batches += [ [] ] + i_sample_idx = len(batches)-1 + + for i in range(len(x)): + batches[i].append ( x[i] ) + + if self.add_sample_idx: + batches[i_sample_idx].append (idx) + + break + yield [ np.array(batch) for batch in batches] diff --git a/samplelib/SampleGeneratorFaceTemporal.py b/samplelib/SampleGeneratorFaceTemporal.py index cf670d2..559cbb7 100644 --- a/samplelib/SampleGeneratorFaceTemporal.py +++ b/samplelib/SampleGeneratorFaceTemporal.py @@ -1,84 +1,84 @@ -import traceback -import numpy as np -import cv2 - -from utils import iter_utils - -from samplelib import SampleType, SampleProcessor, SampleLoader, SampleGeneratorBase - -''' -output_sample_types = [ - [SampleProcessor.TypeFlags, size, (optional)random_sub_size] , - ... - ] -''' -class SampleGeneratorFaceTemporal(SampleGeneratorBase): - def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], generators_count=2, **kwargs): - super().__init__(samples_path, debug, batch_size) - - self.temporal_image_count = temporal_image_count - self.sample_process_options = sample_process_options - self.output_sample_types = output_sample_types - - self.samples = SampleLoader.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path) - - if self.debug: - self.generators_count = 1 - self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] - else: - self.generators_count = min ( generators_count, len(self.samples) ) - self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, i ) for i in range(self.generators_count) ] - - self.generator_counter = -1 - - def __iter__(self): - return self - - def __next__(self): - self.generator_counter += 1 - generator = self.generators[self.generator_counter % len(self.generators) ] - return next(generator) - - def batch_func(self, generator_id): - samples = self.samples - samples_len = len(samples) - if samples_len == 0: - raise ValueError('No training data provided.') - - mult_max = 1 - l = samples_len - (self.temporal_image_count-1)*mult_max + 1 - - samples_idxs = [ *range(l) ] [generator_id::self.generators_count] - - if len(samples_idxs) - self.temporal_image_count < 0: - raise ValueError('Not enough samples to fit temporal line.') - - shuffle_idxs = [] - - while True: - - batches = None - for n_batch in range(self.batch_size): - - if len(shuffle_idxs) == 0: - shuffle_idxs = samples_idxs.copy() - np.random.shuffle (shuffle_idxs) - - idx = shuffle_idxs.pop() - - temporal_samples = [] - mult = np.random.randint(mult_max) - for i in range( self.temporal_image_count ): - sample = samples[ idx+i*mult ] - try: - temporal_samples += SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug) - except: - raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) - - if batches is None: - batches = [ [] for _ in range(len(temporal_samples)) ] - - for i in range(len(temporal_samples)): - batches[i].append ( temporal_samples[i] ) - - yield [ np.array(batch) for batch in batches] +import traceback +import numpy as np +import cv2 + +from utils import iter_utils + +from samplelib import SampleType, SampleProcessor, SampleLoader, SampleGeneratorBase + +''' +output_sample_types = [ + [SampleProcessor.TypeFlags, size, (optional)random_sub_size] , + ... + ] +''' +class SampleGeneratorFaceTemporal(SampleGeneratorBase): + def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], generators_count=2, **kwargs): + super().__init__(samples_path, debug, batch_size) + + self.temporal_image_count = temporal_image_count + self.sample_process_options = sample_process_options + self.output_sample_types = output_sample_types + + self.samples = SampleLoader.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path) + + if self.debug: + self.generators_count = 1 + self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] + else: + self.generators_count = min ( generators_count, len(self.samples) ) + self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, i ) for i in range(self.generators_count) ] + + self.generator_counter = -1 + + def __iter__(self): + return self + + def __next__(self): + self.generator_counter += 1 + generator = self.generators[self.generator_counter % len(self.generators) ] + return next(generator) + + def batch_func(self, generator_id): + samples = self.samples + samples_len = len(samples) + if samples_len == 0: + raise ValueError('No training data provided.') + + mult_max = 1 + l = samples_len - (self.temporal_image_count-1)*mult_max + 1 + + samples_idxs = [ *range(l) ] [generator_id::self.generators_count] + + if len(samples_idxs) - self.temporal_image_count < 0: + raise ValueError('Not enough samples to fit temporal line.') + + shuffle_idxs = [] + + while True: + + batches = None + for n_batch in range(self.batch_size): + + if len(shuffle_idxs) == 0: + shuffle_idxs = samples_idxs.copy() + np.random.shuffle (shuffle_idxs) + + idx = shuffle_idxs.pop() + + temporal_samples = [] + mult = np.random.randint(mult_max) + for i in range( self.temporal_image_count ): + sample = samples[ idx+i*mult ] + try: + temporal_samples += SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) + + if batches is None: + batches = [ [] for _ in range(len(temporal_samples)) ] + + for i in range(len(temporal_samples)): + batches[i].append ( temporal_samples[i] ) + + yield [ np.array(batch) for batch in batches] diff --git a/samplelib/SampleGeneratorImageTemporal.py b/samplelib/SampleGeneratorImageTemporal.py index 190f98d..0e5b238 100644 --- a/samplelib/SampleGeneratorImageTemporal.py +++ b/samplelib/SampleGeneratorImageTemporal.py @@ -1,78 +1,78 @@ -import traceback -import numpy as np -import cv2 - -from utils import iter_utils - -from samplelib import SampleType, SampleProcessor, SampleLoader, SampleGeneratorBase - -''' -output_sample_types = [ - [SampleProcessor.TypeFlags, size, (optional)random_sub_size] , - ... - ] -''' -class SampleGeneratorImageTemporal(SampleGeneratorBase): - def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], **kwargs): - super().__init__(samples_path, debug, batch_size) - - self.temporal_image_count = temporal_image_count - self.sample_process_options = sample_process_options - self.output_sample_types = output_sample_types - - self.samples = SampleLoader.load (SampleType.IMAGE, self.samples_path) - - self.generator_samples = [ self.samples ] - self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] if self.debug else \ - [iter_utils.SubprocessGenerator ( self.batch_func, 0 )] - - self.generator_counter = -1 - - def __iter__(self): - return self - - def __next__(self): - self.generator_counter += 1 - generator = self.generators[self.generator_counter % len(self.generators) ] - return next(generator) - - def batch_func(self, generator_id): - samples = self.generator_samples[generator_id] - samples_len = len(samples) - if samples_len == 0: - raise ValueError('No training data provided.') - - mult_max = 4 - samples_sub_len = samples_len - (self.temporal_image_count-1)*mult_max - if samples_sub_len <= 0: - raise ValueError('Not enough samples to fit temporal line.') - - shuffle_idxs = [] - - while True: - - batches = None - for n_batch in range(self.batch_size): - - if len(shuffle_idxs) == 0: - shuffle_idxs = [ *range(samples_sub_len) ] - np.random.shuffle (shuffle_idxs) - - idx = shuffle_idxs.pop() - - temporal_samples = [] - mult = np.random.randint(mult_max) - for i in range( self.temporal_image_count ): - sample = samples[ idx+i*mult ] - try: - temporal_samples += SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug) - except: - raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) - - if batches is None: - batches = [ [] for _ in range(len(temporal_samples)) ] - - for i in range(len(temporal_samples)): - batches[i].append ( temporal_samples[i] ) - - yield [ np.array(batch) for batch in batches] +import traceback +import numpy as np +import cv2 + +from utils import iter_utils + +from samplelib import SampleType, SampleProcessor, SampleLoader, SampleGeneratorBase + +''' +output_sample_types = [ + [SampleProcessor.TypeFlags, size, (optional)random_sub_size] , + ... + ] +''' +class SampleGeneratorImageTemporal(SampleGeneratorBase): + def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], **kwargs): + super().__init__(samples_path, debug, batch_size) + + self.temporal_image_count = temporal_image_count + self.sample_process_options = sample_process_options + self.output_sample_types = output_sample_types + + self.samples = SampleLoader.load (SampleType.IMAGE, self.samples_path) + + self.generator_samples = [ self.samples ] + self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] if self.debug else \ + [iter_utils.SubprocessGenerator ( self.batch_func, 0 )] + + self.generator_counter = -1 + + def __iter__(self): + return self + + def __next__(self): + self.generator_counter += 1 + generator = self.generators[self.generator_counter % len(self.generators) ] + return next(generator) + + def batch_func(self, generator_id): + samples = self.generator_samples[generator_id] + samples_len = len(samples) + if samples_len == 0: + raise ValueError('No training data provided.') + + mult_max = 4 + samples_sub_len = samples_len - (self.temporal_image_count-1)*mult_max + if samples_sub_len <= 0: + raise ValueError('Not enough samples to fit temporal line.') + + shuffle_idxs = [] + + while True: + + batches = None + for n_batch in range(self.batch_size): + + if len(shuffle_idxs) == 0: + shuffle_idxs = [ *range(samples_sub_len) ] + np.random.shuffle (shuffle_idxs) + + idx = shuffle_idxs.pop() + + temporal_samples = [] + mult = np.random.randint(mult_max) + for i in range( self.temporal_image_count ): + sample = samples[ idx+i*mult ] + try: + temporal_samples += SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) + + if batches is None: + batches = [ [] for _ in range(len(temporal_samples)) ] + + for i in range(len(temporal_samples)): + batches[i].append ( temporal_samples[i] ) + + yield [ np.array(batch) for batch in batches] diff --git a/samplelib/SampleLoader.py b/samplelib/SampleLoader.py index 762171f..42e48be 100644 --- a/samplelib/SampleLoader.py +++ b/samplelib/SampleLoader.py @@ -1,152 +1,152 @@ -import operator -import traceback -from enum import IntEnum -from pathlib import Path - -import cv2 -import numpy as np - -from facelib import FaceType, LandmarksProcessor -from interact import interact as io -from utils import Path_utils -from utils.DFLJPG import DFLJPG -from utils.DFLPNG import DFLPNG - -from .Sample import Sample, SampleType - - -class SampleLoader: - cache = dict() - - @staticmethod - def load(sample_type, samples_path, target_samples_path=None): - cache = SampleLoader.cache - - if str(samples_path) not in cache.keys(): - cache[str(samples_path)] = [None]*SampleType.QTY - - datas = cache[str(samples_path)] - - if sample_type == SampleType.IMAGE: - if datas[sample_type] is None: - datas[sample_type] = [ Sample(filename=filename) for filename in io.progress_bar_generator( Path_utils.get_image_paths(samples_path), "Loading") ] - - elif sample_type == SampleType.FACE: - if datas[sample_type] is None: - datas[sample_type] = SampleLoader.upgradeToFaceSamples( [ Sample(filename=filename) for filename in Path_utils.get_image_paths(samples_path) ] ) - - elif sample_type == SampleType.FACE_TEMPORAL_SORTED: - if datas[sample_type] is None: - datas[sample_type] = SampleLoader.upgradeToFaceTemporalSortedSamples( SampleLoader.load(SampleType.FACE, samples_path) ) - - elif sample_type == SampleType.FACE_YAW_SORTED: - if datas[sample_type] is None: - datas[sample_type] = SampleLoader.upgradeToFaceYawSortedSamples( SampleLoader.load(SampleType.FACE, samples_path) ) - - elif sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: - if datas[sample_type] is None: - if target_samples_path is None: - raise Exception('target_samples_path is None for FACE_YAW_SORTED_AS_TARGET') - datas[sample_type] = SampleLoader.upgradeToFaceYawSortedAsTargetSamples( SampleLoader.load(SampleType.FACE_YAW_SORTED, samples_path), SampleLoader.load(SampleType.FACE_YAW_SORTED, target_samples_path) ) - - return datas[sample_type] - - @staticmethod - def upgradeToFaceSamples ( samples ): - sample_list = [] - - for s in io.progress_bar_generator(samples, "Loading"): - s_filename_path = Path(s.filename) - try: - if s_filename_path.suffix == '.png': - dflimg = DFLPNG.load ( str(s_filename_path) ) - elif s_filename_path.suffix == '.jpg': - dflimg = DFLJPG.load ( str(s_filename_path) ) - else: - dflimg = None - - if dflimg is None: - print ("%s is not a dfl image file required for training" % (s_filename_path.name) ) - continue - - landmarks = dflimg.get_landmarks() - pitch_yaw_roll = dflimg.get_pitch_yaw_roll() - if pitch_yaw_roll is None: - pitch_yaw_roll = LandmarksProcessor.estimate_pitch_yaw_roll(landmarks) - - - sample_list.append( s.copy_and_set(sample_type=SampleType.FACE, - face_type=FaceType.fromString (dflimg.get_face_type()), - shape=dflimg.get_shape(), - landmarks=landmarks, - ie_polys=dflimg.get_ie_polys(), - pitch_yaw_roll=pitch_yaw_roll, - source_filename=dflimg.get_source_filename(), - fanseg_mask_exist=dflimg.get_fanseg_mask() is not None, ) ) - except: - print ("Unable to load %s , error: %s" % (str(s_filename_path), traceback.format_exc() ) ) - - return sample_list - - @staticmethod - def upgradeToFaceTemporalSortedSamples( samples ): - new_s = [ (s, s.source_filename) for s in samples] - new_s = sorted(new_s, key=operator.itemgetter(1)) - - return [ s[0] for s in new_s] - - @staticmethod - def upgradeToFaceYawSortedSamples( samples ): - - lowest_yaw, highest_yaw = -1.0, 1.0 - gradations = 64 - diff_rot_per_grad = abs(highest_yaw-lowest_yaw) / gradations - - yaws_sample_list = [None]*gradations - - for i in io.progress_bar_generator(range(gradations), "Sorting"): - yaw = lowest_yaw + i*diff_rot_per_grad - next_yaw = lowest_yaw + (i+1)*diff_rot_per_grad - - yaw_samples = [] - for s in samples: - s_yaw = s.pitch_yaw_roll[1] - if (i == 0 and s_yaw < next_yaw) or \ - (i < gradations-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ - (i == gradations-1 and s_yaw >= yaw): - yaw_samples.append ( s.copy_and_set(sample_type=SampleType.FACE_YAW_SORTED) ) - - if len(yaw_samples) > 0: - yaws_sample_list[i] = yaw_samples - - return yaws_sample_list - - @staticmethod - def upgradeToFaceYawSortedAsTargetSamples (s, t): - l = len(s) - if l != len(t): - raise Exception('upgradeToFaceYawSortedAsTargetSamples() s_len != t_len') - b = l // 2 - - s_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in s] ) == 1 )[:,0] - t_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in t] ) == 1 )[:,0] - - new_s = [None]*l - - for t_idx in t_idxs: - search_idxs = [] - for i in range(0,l): - search_idxs += [t_idx - i, (l-t_idx-1) - i, t_idx + i, (l-t_idx-1) + i] - - for search_idx in search_idxs: - if search_idx in s_idxs: - mirrored = ( t_idx != search_idx and ((t_idx < b and search_idx >= b) or (search_idx < b and t_idx >= b)) ) - new_s[t_idx] = [ sample.copy_and_set(sample_type=SampleType.FACE_YAW_SORTED_AS_TARGET, - mirror=True, - pitch_yaw_roll=(sample.pitch_yaw_roll[0],-sample.pitch_yaw_roll[1],sample.pitch_yaw_roll[2]), - landmarks=LandmarksProcessor.mirror_landmarks (sample.landmarks, sample.shape[1] )) - for sample in s[search_idx] - ] if mirrored else s[search_idx] - break - - return new_s +import operator +import traceback +from enum import IntEnum +from pathlib import Path + +import cv2 +import numpy as np + +from facelib import FaceType, LandmarksProcessor +from interact import interact as io +from utils import Path_utils +from utils.DFLJPG import DFLJPG +from utils.DFLPNG import DFLPNG + +from .Sample import Sample, SampleType + + +class SampleLoader: + cache = dict() + + @staticmethod + def load(sample_type, samples_path, target_samples_path=None): + cache = SampleLoader.cache + + if str(samples_path) not in cache.keys(): + cache[str(samples_path)] = [None]*SampleType.QTY + + datas = cache[str(samples_path)] + + if sample_type == SampleType.IMAGE: + if datas[sample_type] is None: + datas[sample_type] = [ Sample(filename=filename) for filename in io.progress_bar_generator( Path_utils.get_image_paths(samples_path), "Loading") ] + + elif sample_type == SampleType.FACE: + if datas[sample_type] is None: + datas[sample_type] = SampleLoader.upgradeToFaceSamples( [ Sample(filename=filename) for filename in Path_utils.get_image_paths(samples_path) ] ) + + elif sample_type == SampleType.FACE_TEMPORAL_SORTED: + if datas[sample_type] is None: + datas[sample_type] = SampleLoader.upgradeToFaceTemporalSortedSamples( SampleLoader.load(SampleType.FACE, samples_path) ) + + elif sample_type == SampleType.FACE_YAW_SORTED: + if datas[sample_type] is None: + datas[sample_type] = SampleLoader.upgradeToFaceYawSortedSamples( SampleLoader.load(SampleType.FACE, samples_path) ) + + elif sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: + if datas[sample_type] is None: + if target_samples_path is None: + raise Exception('target_samples_path is None for FACE_YAW_SORTED_AS_TARGET') + datas[sample_type] = SampleLoader.upgradeToFaceYawSortedAsTargetSamples( SampleLoader.load(SampleType.FACE_YAW_SORTED, samples_path), SampleLoader.load(SampleType.FACE_YAW_SORTED, target_samples_path) ) + + return datas[sample_type] + + @staticmethod + def upgradeToFaceSamples ( samples ): + sample_list = [] + + for s in io.progress_bar_generator(samples, "Loading"): + s_filename_path = Path(s.filename) + try: + if s_filename_path.suffix == '.png': + dflimg = DFLPNG.load ( str(s_filename_path) ) + elif s_filename_path.suffix == '.jpg': + dflimg = DFLJPG.load ( str(s_filename_path) ) + else: + dflimg = None + + if dflimg is None: + print ("%s is not a dfl image file required for training" % (s_filename_path.name) ) + continue + + landmarks = dflimg.get_landmarks() + pitch_yaw_roll = dflimg.get_pitch_yaw_roll() + if pitch_yaw_roll is None: + pitch_yaw_roll = LandmarksProcessor.estimate_pitch_yaw_roll(landmarks) + + + sample_list.append( s.copy_and_set(sample_type=SampleType.FACE, + face_type=FaceType.fromString (dflimg.get_face_type()), + shape=dflimg.get_shape(), + landmarks=landmarks, + ie_polys=dflimg.get_ie_polys(), + pitch_yaw_roll=pitch_yaw_roll, + source_filename=dflimg.get_source_filename(), + fanseg_mask_exist=dflimg.get_fanseg_mask() is not None, ) ) + except: + print ("Unable to load %s , error: %s" % (str(s_filename_path), traceback.format_exc() ) ) + + return sample_list + + @staticmethod + def upgradeToFaceTemporalSortedSamples( samples ): + new_s = [ (s, s.source_filename) for s in samples] + new_s = sorted(new_s, key=operator.itemgetter(1)) + + return [ s[0] for s in new_s] + + @staticmethod + def upgradeToFaceYawSortedSamples( samples ): + + lowest_yaw, highest_yaw = -1.0, 1.0 + gradations = 64 + diff_rot_per_grad = abs(highest_yaw-lowest_yaw) / gradations + + yaws_sample_list = [None]*gradations + + for i in io.progress_bar_generator(range(gradations), "Sorting"): + yaw = lowest_yaw + i*diff_rot_per_grad + next_yaw = lowest_yaw + (i+1)*diff_rot_per_grad + + yaw_samples = [] + for s in samples: + s_yaw = s.pitch_yaw_roll[1] + if (i == 0 and s_yaw < next_yaw) or \ + (i < gradations-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ + (i == gradations-1 and s_yaw >= yaw): + yaw_samples.append ( s.copy_and_set(sample_type=SampleType.FACE_YAW_SORTED) ) + + if len(yaw_samples) > 0: + yaws_sample_list[i] = yaw_samples + + return yaws_sample_list + + @staticmethod + def upgradeToFaceYawSortedAsTargetSamples (s, t): + l = len(s) + if l != len(t): + raise Exception('upgradeToFaceYawSortedAsTargetSamples() s_len != t_len') + b = l // 2 + + s_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in s] ) == 1 )[:,0] + t_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in t] ) == 1 )[:,0] + + new_s = [None]*l + + for t_idx in t_idxs: + search_idxs = [] + for i in range(0,l): + search_idxs += [t_idx - i, (l-t_idx-1) - i, t_idx + i, (l-t_idx-1) + i] + + for search_idx in search_idxs: + if search_idx in s_idxs: + mirrored = ( t_idx != search_idx and ((t_idx < b and search_idx >= b) or (search_idx < b and t_idx >= b)) ) + new_s[t_idx] = [ sample.copy_and_set(sample_type=SampleType.FACE_YAW_SORTED_AS_TARGET, + mirror=True, + pitch_yaw_roll=(sample.pitch_yaw_roll[0],-sample.pitch_yaw_roll[1],sample.pitch_yaw_roll[2]), + landmarks=LandmarksProcessor.mirror_landmarks (sample.landmarks, sample.shape[1] )) + for sample in s[search_idx] + ] if mirrored else s[search_idx] + break + + return new_s diff --git a/samplelib/SampleProcessor.py b/samplelib/SampleProcessor.py index 0921e52..bab51c9 100644 --- a/samplelib/SampleProcessor.py +++ b/samplelib/SampleProcessor.py @@ -1,316 +1,316 @@ -import collections -from enum import IntEnum - -import cv2 -import numpy as np - -import imagelib -from facelib import FaceType, LandmarksProcessor - - -""" -output_sample_types = [ - {} opts, - ... - ] - -opts: - 'types' : (S,S,...,S) - where S: - 'IMG_SOURCE' - 'IMG_WARPED' - 'IMG_WARPED_TRANSFORMED'' - 'IMG_TRANSFORMED' - 'IMG_LANDMARKS_ARRAY' #currently unused - 'IMG_PITCH_YAW_ROLL' - - 'FACE_TYPE_HALF' - 'FACE_TYPE_FULL' - 'FACE_TYPE_HEAD' #currently unused - 'FACE_TYPE_AVATAR' #currently unused - - 'MODE_BGR' #BGR - 'MODE_G' #Grayscale - 'MODE_GGG' #3xGrayscale - 'MODE_M' #mask only - 'MODE_BGR_SHUFFLE' #BGR shuffle - - 'resolution' : N - 'motion_blur' : (chance_int, range) - chance 0..100 to apply to face (not mask), and range [1..3] where 3 is highest power of motion blur - 'apply_ct' : bool - 'normalize_tanh' : bool - -""" - -class SampleProcessor(object): - class Types(IntEnum): - NONE = 0 - - IMG_TYPE_BEGIN = 1 - IMG_SOURCE = 1 - IMG_WARPED = 2 - IMG_WARPED_TRANSFORMED = 3 - IMG_TRANSFORMED = 4 - IMG_LANDMARKS_ARRAY = 5 #currently unused - IMG_PITCH_YAW_ROLL = 6 - IMG_PITCH_YAW_ROLL_SIGMOID = 7 - IMG_TYPE_END = 10 - - FACE_TYPE_BEGIN = 10 - FACE_TYPE_HALF = 10 - FACE_TYPE_FULL = 11 - FACE_TYPE_HEAD = 12 #currently unused - FACE_TYPE_AVATAR = 13 #currently unused - FACE_TYPE_END = 20 - - MODE_BEGIN = 40 - MODE_BGR = 40 #BGR - MODE_G = 41 #Grayscale - MODE_GGG = 42 #3xGrayscale - MODE_M = 43 #mask only - MODE_BGR_SHUFFLE = 44 #BGR shuffle - MODE_END = 50 - - class Options(object): - - def __init__(self, random_flip = True, rotation_range=[-10,10], scale_range=[-0.05, 0.05], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): - self.random_flip = random_flip - self.rotation_range = rotation_range - self.scale_range = scale_range - self.tx_range = tx_range - self.ty_range = ty_range - - @staticmethod - def process (sample, sample_process_options, output_sample_types, debug, ct_sample=None): - SPTF = SampleProcessor.Types - - sample_bgr = sample.load_bgr() - ct_sample_bgr = None - ct_sample_mask = None - h,w,c = sample_bgr.shape - - is_face_sample = sample.landmarks is not None - - if debug and is_face_sample: - LandmarksProcessor.draw_landmarks (sample_bgr, sample.landmarks, (0, 1, 0)) - - params = imagelib.gen_warp_params(sample_bgr, sample_process_options.random_flip, rotation_range=sample_process_options.rotation_range, scale_range=sample_process_options.scale_range, tx_range=sample_process_options.tx_range, ty_range=sample_process_options.ty_range ) - - cached_images = collections.defaultdict(dict) - - sample_rnd_seed = np.random.randint(0x80000000) - - SPTF_FACETYPE_TO_FACETYPE = { SPTF.FACE_TYPE_HALF : FaceType.HALF, - SPTF.FACE_TYPE_FULL : FaceType.FULL, - SPTF.FACE_TYPE_HEAD : FaceType.HEAD, - SPTF.FACE_TYPE_AVATAR : FaceType.AVATAR } - - outputs = [] - for opts in output_sample_types: - - resolution = opts.get('resolution', 0) - types = opts.get('types', [] ) - - random_sub_res = opts.get('random_sub_res', 0) - normalize_std_dev = opts.get('normalize_std_dev', False) - normalize_vgg = opts.get('normalize_vgg', False) - motion_blur = opts.get('motion_blur', None) - apply_ct = opts.get('apply_ct', False) - normalize_tanh = opts.get('normalize_tanh', False) - - img_type = SPTF.NONE - target_face_type = SPTF.NONE - face_mask_type = SPTF.NONE - mode_type = SPTF.NONE - for t in types: - if t >= SPTF.IMG_TYPE_BEGIN and t < SPTF.IMG_TYPE_END: - img_type = t - elif t >= SPTF.FACE_TYPE_BEGIN and t < SPTF.FACE_TYPE_END: - target_face_type = t - elif t >= SPTF.MODE_BEGIN and t < SPTF.MODE_END: - mode_type = t - - if img_type == SPTF.NONE: - raise ValueError ('expected IMG_ type') - - if img_type == SPTF.IMG_LANDMARKS_ARRAY: - l = sample.landmarks - l = np.concatenate ( [ np.expand_dims(l[:,0] / w,-1), np.expand_dims(l[:,1] / h,-1) ], -1 ) - l = np.clip(l, 0.0, 1.0) - img = l - elif img_type == SPTF.IMG_PITCH_YAW_ROLL or img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: - pitch_yaw_roll = sample.pitch_yaw_roll - if pitch_yaw_roll is not None: - pitch, yaw, roll = pitch_yaw_roll - else: - pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll (sample.landmarks) - if params['flip']: - yaw = -yaw - - if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: - pitch = (pitch+1.0) / 2.0 - yaw = (yaw+1.0) / 2.0 - roll = (roll+1.0) / 2.0 - - img = (pitch, yaw, roll) - else: - if mode_type == SPTF.NONE: - raise ValueError ('expected MODE_ type') - - img = cached_images.get(img_type, None) - if img is None: - - img = sample_bgr - mask = None - cur_sample = sample - - if is_face_sample: - if motion_blur is not None: - chance, mb_range = motion_blur - chance = np.clip(chance, 0, 100) - - if np.random.randint(100) < chance: - mb_range = [3,5,7,9][ : np.clip(mb_range, 0, 3)+1 ] - dim = mb_range[ np.random.randint(len(mb_range) ) ] - img = imagelib.LinearMotionBlur (img, dim, np.random.randint(180) ) - - mask = cur_sample.load_fanseg_mask() #using fanseg_mask if exist - - if mask is None: - mask = LandmarksProcessor.get_image_hull_mask (img.shape, cur_sample.landmarks) - - if cur_sample.ie_polys is not None: - cur_sample.ie_polys.overlay_mask(mask) - - warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED) - transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED) - flip = img_type != SPTF.IMG_WARPED - - img = imagelib.warp_by_params (params, img, warp, transform, flip, True) - if mask is not None: - mask = imagelib.warp_by_params (params, mask, warp, transform, flip, False)[...,np.newaxis] - img = np.concatenate( (img, mask ), -1 ) - - cached_images[img_type] = img - - if is_face_sample and target_face_type != SPTF.NONE: - ft = SPTF_FACETYPE_TO_FACETYPE[target_face_type] - if ft > sample.face_type: - raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, ft) ) - img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, ft), (resolution,resolution), flags=cv2.INTER_CUBIC ) - else: - img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC ) - - if random_sub_res != 0: - sub_size = resolution - random_sub_res - rnd_state = np.random.RandomState (sample_rnd_seed+random_sub_res) - start_x = rnd_state.randint(sub_size+1) - start_y = rnd_state.randint(sub_size+1) - img = img[start_y:start_y+sub_size,start_x:start_x+sub_size,:] - - img = np.clip(img, 0, 1) - img_bgr = img[...,0:3] - img_mask = img[...,3:4] - - if apply_ct and ct_sample is not None: - if ct_sample_bgr is None: - ct_sample_bgr = ct_sample.load_bgr() - - ct_sample_bgr_resized = cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR ) - - img_bgr = imagelib.linear_color_transfer (img_bgr, ct_sample_bgr_resized) - img_bgr = np.clip( img_bgr, 0.0, 1.0) - - if normalize_std_dev: - img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) ) - elif normalize_vgg: - img_bgr = np.clip(img_bgr*255, 0, 255) - img_bgr[:,:,0] -= 103.939 - img_bgr[:,:,1] -= 116.779 - img_bgr[:,:,2] -= 123.68 - - if mode_type == SPTF.MODE_BGR: - img = img_bgr - elif mode_type == SPTF.MODE_BGR_SHUFFLE: - rnd_state = np.random.RandomState (sample_rnd_seed) - img = np.take (img_bgr, rnd_state.permutation(img_bgr.shape[-1]), axis=-1) - elif mode_type == SPTF.MODE_G: - img = np.concatenate ( (np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1),img_mask) , -1 ) - elif mode_type == SPTF.MODE_GGG: - img = np.concatenate ( ( np.repeat ( np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1), (3,), -1), img_mask), -1) - elif mode_type == SPTF.MODE_M and is_face_sample: - img = img_mask - - if not debug: - if normalize_tanh: - img = np.clip (img * 2.0 - 1.0, -1.0, 1.0) - else: - img = np.clip (img, 0.0, 1.0) - - outputs.append ( img ) - - if debug: - result = [] - - for output in outputs: - if output.shape[2] < 4: - result += [output,] - elif output.shape[2] == 4: - result += [output[...,0:3]*output[...,3:4],] - - return result - else: - return outputs - -""" - close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None - close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None - - if debug and close_sample_bgr is not None: - LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0)) - RANDOM_CLOSE = 0x00000040, #currently unused - MORPH_TO_RANDOM_CLOSE = 0x00000080, #currently unused - -if f & SPTF.RANDOM_CLOSE != 0: - img_type += 10 - elif f & SPTF.MORPH_TO_RANDOM_CLOSE != 0: - img_type += 20 -if img_type >= 10 and img_type <= 19: #RANDOM_CLOSE - img_type -= 10 - img = close_sample_bgr - cur_sample = close_sample - -elif img_type >= 20 and img_type <= 29: #MORPH_TO_RANDOM_CLOSE - img_type -= 20 - res = sample.shape[0] - - s_landmarks = sample.landmarks.copy() - d_landmarks = close_sample.landmarks.copy() - idxs = list(range(len(s_landmarks))) - #remove landmarks near boundaries - for i in idxs[:]: - s_l = s_landmarks[i] - d_l = d_landmarks[i] - if s_l[0] < 5 or s_l[1] < 5 or s_l[0] >= res-5 or s_l[1] >= res-5 or \ - d_l[0] < 5 or d_l[1] < 5 or d_l[0] >= res-5 or d_l[1] >= res-5: - idxs.remove(i) - #remove landmarks that close to each other in 5 dist - for landmarks in [s_landmarks, d_landmarks]: - for i in idxs[:]: - s_l = landmarks[i] - for j in idxs[:]: - if i == j: - continue - s_l_2 = landmarks[j] - diff_l = np.abs(s_l - s_l_2) - if np.sqrt(diff_l.dot(diff_l)) < 5: - idxs.remove(i) - break - s_landmarks = s_landmarks[idxs] - d_landmarks = d_landmarks[idxs] - s_landmarks = np.concatenate ( [s_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] ) - d_landmarks = np.concatenate ( [d_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] ) - img = imagelib.morph_by_points (sample_bgr, s_landmarks, d_landmarks) - cur_sample = close_sample -else: - """ +import collections +from enum import IntEnum + +import cv2 +import numpy as np + +import imagelib +from facelib import FaceType, LandmarksProcessor + + +""" +output_sample_types = [ + {} opts, + ... + ] + +opts: + 'types' : (S,S,...,S) + where S: + 'IMG_SOURCE' + 'IMG_WARPED' + 'IMG_WARPED_TRANSFORMED'' + 'IMG_TRANSFORMED' + 'IMG_LANDMARKS_ARRAY' #currently unused + 'IMG_PITCH_YAW_ROLL' + + 'FACE_TYPE_HALF' + 'FACE_TYPE_FULL' + 'FACE_TYPE_HEAD' #currently unused + 'FACE_TYPE_AVATAR' #currently unused + + 'MODE_BGR' #BGR + 'MODE_G' #Grayscale + 'MODE_GGG' #3xGrayscale + 'MODE_M' #mask only + 'MODE_BGR_SHUFFLE' #BGR shuffle + + 'resolution' : N + 'motion_blur' : (chance_int, range) - chance 0..100 to apply to face (not mask), and range [1..3] where 3 is highest power of motion blur + 'apply_ct' : bool + 'normalize_tanh' : bool + +""" + +class SampleProcessor(object): + class Types(IntEnum): + NONE = 0 + + IMG_TYPE_BEGIN = 1 + IMG_SOURCE = 1 + IMG_WARPED = 2 + IMG_WARPED_TRANSFORMED = 3 + IMG_TRANSFORMED = 4 + IMG_LANDMARKS_ARRAY = 5 #currently unused + IMG_PITCH_YAW_ROLL = 6 + IMG_PITCH_YAW_ROLL_SIGMOID = 7 + IMG_TYPE_END = 10 + + FACE_TYPE_BEGIN = 10 + FACE_TYPE_HALF = 10 + FACE_TYPE_FULL = 11 + FACE_TYPE_HEAD = 12 #currently unused + FACE_TYPE_AVATAR = 13 #currently unused + FACE_TYPE_END = 20 + + MODE_BEGIN = 40 + MODE_BGR = 40 #BGR + MODE_G = 41 #Grayscale + MODE_GGG = 42 #3xGrayscale + MODE_M = 43 #mask only + MODE_BGR_SHUFFLE = 44 #BGR shuffle + MODE_END = 50 + + class Options(object): + + def __init__(self, random_flip = True, rotation_range=[-10,10], scale_range=[-0.05, 0.05], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): + self.random_flip = random_flip + self.rotation_range = rotation_range + self.scale_range = scale_range + self.tx_range = tx_range + self.ty_range = ty_range + + @staticmethod + def process (sample, sample_process_options, output_sample_types, debug, ct_sample=None): + SPTF = SampleProcessor.Types + + sample_bgr = sample.load_bgr() + ct_sample_bgr = None + ct_sample_mask = None + h,w,c = sample_bgr.shape + + is_face_sample = sample.landmarks is not None + + if debug and is_face_sample: + LandmarksProcessor.draw_landmarks (sample_bgr, sample.landmarks, (0, 1, 0)) + + params = imagelib.gen_warp_params(sample_bgr, sample_process_options.random_flip, rotation_range=sample_process_options.rotation_range, scale_range=sample_process_options.scale_range, tx_range=sample_process_options.tx_range, ty_range=sample_process_options.ty_range ) + + cached_images = collections.defaultdict(dict) + + sample_rnd_seed = np.random.randint(0x80000000) + + SPTF_FACETYPE_TO_FACETYPE = { SPTF.FACE_TYPE_HALF : FaceType.HALF, + SPTF.FACE_TYPE_FULL : FaceType.FULL, + SPTF.FACE_TYPE_HEAD : FaceType.HEAD, + SPTF.FACE_TYPE_AVATAR : FaceType.AVATAR } + + outputs = [] + for opts in output_sample_types: + + resolution = opts.get('resolution', 0) + types = opts.get('types', [] ) + + random_sub_res = opts.get('random_sub_res', 0) + normalize_std_dev = opts.get('normalize_std_dev', False) + normalize_vgg = opts.get('normalize_vgg', False) + motion_blur = opts.get('motion_blur', None) + apply_ct = opts.get('apply_ct', False) + normalize_tanh = opts.get('normalize_tanh', False) + + img_type = SPTF.NONE + target_face_type = SPTF.NONE + face_mask_type = SPTF.NONE + mode_type = SPTF.NONE + for t in types: + if t >= SPTF.IMG_TYPE_BEGIN and t < SPTF.IMG_TYPE_END: + img_type = t + elif t >= SPTF.FACE_TYPE_BEGIN and t < SPTF.FACE_TYPE_END: + target_face_type = t + elif t >= SPTF.MODE_BEGIN and t < SPTF.MODE_END: + mode_type = t + + if img_type == SPTF.NONE: + raise ValueError ('expected IMG_ type') + + if img_type == SPTF.IMG_LANDMARKS_ARRAY: + l = sample.landmarks + l = np.concatenate ( [ np.expand_dims(l[:,0] / w,-1), np.expand_dims(l[:,1] / h,-1) ], -1 ) + l = np.clip(l, 0.0, 1.0) + img = l + elif img_type == SPTF.IMG_PITCH_YAW_ROLL or img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: + pitch_yaw_roll = sample.pitch_yaw_roll + if pitch_yaw_roll is not None: + pitch, yaw, roll = pitch_yaw_roll + else: + pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll (sample.landmarks) + if params['flip']: + yaw = -yaw + + if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: + pitch = (pitch+1.0) / 2.0 + yaw = (yaw+1.0) / 2.0 + roll = (roll+1.0) / 2.0 + + img = (pitch, yaw, roll) + else: + if mode_type == SPTF.NONE: + raise ValueError ('expected MODE_ type') + + img = cached_images.get(img_type, None) + if img is None: + + img = sample_bgr + mask = None + cur_sample = sample + + if is_face_sample: + if motion_blur is not None: + chance, mb_range = motion_blur + chance = np.clip(chance, 0, 100) + + if np.random.randint(100) < chance: + mb_range = [3,5,7,9][ : np.clip(mb_range, 0, 3)+1 ] + dim = mb_range[ np.random.randint(len(mb_range) ) ] + img = imagelib.LinearMotionBlur (img, dim, np.random.randint(180) ) + + mask = cur_sample.load_fanseg_mask() #using fanseg_mask if exist + + if mask is None: + mask = LandmarksProcessor.get_image_hull_mask (img.shape, cur_sample.landmarks) + + if cur_sample.ie_polys is not None: + cur_sample.ie_polys.overlay_mask(mask) + + warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED) + transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED) + flip = img_type != SPTF.IMG_WARPED + + img = imagelib.warp_by_params (params, img, warp, transform, flip, True) + if mask is not None: + mask = imagelib.warp_by_params (params, mask, warp, transform, flip, False)[...,np.newaxis] + img = np.concatenate( (img, mask ), -1 ) + + cached_images[img_type] = img + + if is_face_sample and target_face_type != SPTF.NONE: + ft = SPTF_FACETYPE_TO_FACETYPE[target_face_type] + if ft > sample.face_type: + raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, ft) ) + img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, ft), (resolution,resolution), flags=cv2.INTER_CUBIC ) + else: + img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC ) + + if random_sub_res != 0: + sub_size = resolution - random_sub_res + rnd_state = np.random.RandomState (sample_rnd_seed+random_sub_res) + start_x = rnd_state.randint(sub_size+1) + start_y = rnd_state.randint(sub_size+1) + img = img[start_y:start_y+sub_size,start_x:start_x+sub_size,:] + + img = np.clip(img, 0, 1) + img_bgr = img[...,0:3] + img_mask = img[...,3:4] + + if apply_ct and ct_sample is not None: + if ct_sample_bgr is None: + ct_sample_bgr = ct_sample.load_bgr() + + ct_sample_bgr_resized = cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR ) + + img_bgr = imagelib.linear_color_transfer (img_bgr, ct_sample_bgr_resized) + img_bgr = np.clip( img_bgr, 0.0, 1.0) + + if normalize_std_dev: + img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) ) + elif normalize_vgg: + img_bgr = np.clip(img_bgr*255, 0, 255) + img_bgr[:,:,0] -= 103.939 + img_bgr[:,:,1] -= 116.779 + img_bgr[:,:,2] -= 123.68 + + if mode_type == SPTF.MODE_BGR: + img = img_bgr + elif mode_type == SPTF.MODE_BGR_SHUFFLE: + rnd_state = np.random.RandomState (sample_rnd_seed) + img = np.take (img_bgr, rnd_state.permutation(img_bgr.shape[-1]), axis=-1) + elif mode_type == SPTF.MODE_G: + img = np.concatenate ( (np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1),img_mask) , -1 ) + elif mode_type == SPTF.MODE_GGG: + img = np.concatenate ( ( np.repeat ( np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1), (3,), -1), img_mask), -1) + elif mode_type == SPTF.MODE_M and is_face_sample: + img = img_mask + + if not debug: + if normalize_tanh: + img = np.clip (img * 2.0 - 1.0, -1.0, 1.0) + else: + img = np.clip (img, 0.0, 1.0) + + outputs.append ( img ) + + if debug: + result = [] + + for output in outputs: + if output.shape[2] < 4: + result += [output,] + elif output.shape[2] == 4: + result += [output[...,0:3]*output[...,3:4],] + + return result + else: + return outputs + +""" + close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None + close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None + + if debug and close_sample_bgr is not None: + LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0)) + RANDOM_CLOSE = 0x00000040, #currently unused + MORPH_TO_RANDOM_CLOSE = 0x00000080, #currently unused + +if f & SPTF.RANDOM_CLOSE != 0: + img_type += 10 + elif f & SPTF.MORPH_TO_RANDOM_CLOSE != 0: + img_type += 20 +if img_type >= 10 and img_type <= 19: #RANDOM_CLOSE + img_type -= 10 + img = close_sample_bgr + cur_sample = close_sample + +elif img_type >= 20 and img_type <= 29: #MORPH_TO_RANDOM_CLOSE + img_type -= 20 + res = sample.shape[0] + + s_landmarks = sample.landmarks.copy() + d_landmarks = close_sample.landmarks.copy() + idxs = list(range(len(s_landmarks))) + #remove landmarks near boundaries + for i in idxs[:]: + s_l = s_landmarks[i] + d_l = d_landmarks[i] + if s_l[0] < 5 or s_l[1] < 5 or s_l[0] >= res-5 or s_l[1] >= res-5 or \ + d_l[0] < 5 or d_l[1] < 5 or d_l[0] >= res-5 or d_l[1] >= res-5: + idxs.remove(i) + #remove landmarks that close to each other in 5 dist + for landmarks in [s_landmarks, d_landmarks]: + for i in idxs[:]: + s_l = landmarks[i] + for j in idxs[:]: + if i == j: + continue + s_l_2 = landmarks[j] + diff_l = np.abs(s_l - s_l_2) + if np.sqrt(diff_l.dot(diff_l)) < 5: + idxs.remove(i) + break + s_landmarks = s_landmarks[idxs] + d_landmarks = d_landmarks[idxs] + s_landmarks = np.concatenate ( [s_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] ) + d_landmarks = np.concatenate ( [d_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] ) + img = imagelib.morph_by_points (sample_bgr, s_landmarks, d_landmarks) + cur_sample = close_sample +else: + """ diff --git a/samplelib/__init__.py b/samplelib/__init__.py index d865394..ceefca2 100644 --- a/samplelib/__init__.py +++ b/samplelib/__init__.py @@ -1,8 +1,8 @@ -from .Sample import Sample -from .Sample import SampleType -from .SampleLoader import SampleLoader -from .SampleProcessor import SampleProcessor -from .SampleGeneratorBase import SampleGeneratorBase -from .SampleGeneratorFace import SampleGeneratorFace -from .SampleGeneratorFaceTemporal import SampleGeneratorFaceTemporal -from .SampleGeneratorImageTemporal import SampleGeneratorImageTemporal +from .Sample import Sample +from .Sample import SampleType +from .SampleLoader import SampleLoader +from .SampleProcessor import SampleProcessor +from .SampleGeneratorBase import SampleGeneratorBase +from .SampleGeneratorFace import SampleGeneratorFace +from .SampleGeneratorFaceTemporal import SampleGeneratorFaceTemporal +from .SampleGeneratorImageTemporal import SampleGeneratorImageTemporal diff --git a/utils/DFLJPG.py b/utils/DFLJPG.py index c06e96a..7be82a6 100644 --- a/utils/DFLJPG.py +++ b/utils/DFLJPG.py @@ -1,303 +1,303 @@ -import pickle -import struct - -import cv2 -import numpy as np - -from facelib import FaceType -from imagelib import IEPolys -from utils.struct_utils import * -from interact import interact as io - -class DFLJPG(object): - def __init__(self): - self.data = b"" - self.length = 0 - self.chunks = [] - self.dfl_dict = None - self.shape = (0,0,0) - - @staticmethod - def load_raw(filename): - try: - with open(filename, "rb") as f: - data = f.read() - except: - raise FileNotFoundError(filename) - - try: - inst = DFLJPG() - inst.data = data - inst.length = len(data) - inst_length = inst.length - chunks = [] - data_counter = 0 - while data_counter < inst_length: - chunk_m_l, chunk_m_h = struct.unpack ("BB", data[data_counter:data_counter+2]) - data_counter += 2 - - if chunk_m_l != 0xFF: - raise ValueError("No Valid JPG info") - - chunk_name = None - chunk_size = None - chunk_data = None - chunk_ex_data = None - is_unk_chunk = False - - if chunk_m_h & 0xF0 == 0xD0: - n = chunk_m_h & 0x0F - - if n >= 0 and n <= 7: - chunk_name = "RST%d" % (n) - chunk_size = 0 - elif n == 0x8: - chunk_name = "SOI" - chunk_size = 0 - if len(chunks) != 0: - raise Exception("") - elif n == 0x9: - chunk_name = "EOI" - chunk_size = 0 - elif n == 0xA: - chunk_name = "SOS" - elif n == 0xB: - chunk_name = "DQT" - elif n == 0xD: - chunk_name = "DRI" - chunk_size = 2 - else: - is_unk_chunk = True - elif chunk_m_h & 0xF0 == 0xC0: - n = chunk_m_h & 0x0F - if n == 0: - chunk_name = "SOF0" - elif n == 2: - chunk_name = "SOF2" - elif n == 4: - chunk_name = "DHT" - else: - is_unk_chunk = True - elif chunk_m_h & 0xF0 == 0xE0: - n = chunk_m_h & 0x0F - chunk_name = "APP%d" % (n) - else: - is_unk_chunk = True - - if is_unk_chunk: - raise ValueError("Unknown chunk %X" % (chunk_m_h) ) - - if chunk_size == None: #variable size - chunk_size, = struct.unpack (">H", data[data_counter:data_counter+2]) - chunk_size -= 2 - data_counter += 2 - - if chunk_size > 0: - chunk_data = data[data_counter:data_counter+chunk_size] - data_counter += chunk_size - - if chunk_name == "SOS": - c = data_counter - while c < inst_length and (data[c] != 0xFF or data[c+1] != 0xD9): - c += 1 - - chunk_ex_data = data[data_counter:c] - data_counter = c - - chunks.append ({'name' : chunk_name, - 'm_h' : chunk_m_h, - 'data' : chunk_data, - 'ex_data' : chunk_ex_data, - }) - inst.chunks = chunks - - return inst - except Exception as e: - raise Exception ("Corrupted JPG file: %s" % (str(e))) - - @staticmethod - def load(filename): - try: - inst = DFLJPG.load_raw (filename) - inst.dfl_dict = None - - for chunk in inst.chunks: - if chunk['name'] == 'APP0': - d, c = chunk['data'], 0 - c, id, _ = struct_unpack (d, c, "=4sB") - - if id == b"JFIF": - c, ver_major, ver_minor, units, Xdensity, Ydensity, Xthumbnail, Ythumbnail = struct_unpack (d, c, "=BBBHHBB") - #if units == 0: - # inst.shape = (Ydensity, Xdensity, 3) - else: - raise Exception("Unknown jpeg ID: %s" % (id) ) - elif chunk['name'] == 'SOF0' or chunk['name'] == 'SOF2': - d, c = chunk['data'], 0 - c, precision, height, width = struct_unpack (d, c, ">BHH") - inst.shape = (height, width, 3) - - elif chunk['name'] == 'APP15': - if type(chunk['data']) == bytes: - inst.dfl_dict = pickle.loads(chunk['data']) - - if (inst.dfl_dict is not None): - if 'face_type' not in inst.dfl_dict: - inst.dfl_dict['face_type'] = FaceType.toString (FaceType.FULL) - - if 'fanseg_mask' in inst.dfl_dict: - fanseg_mask = inst.dfl_dict['fanseg_mask'] - if fanseg_mask is not None: - numpyarray = np.asarray( inst.dfl_dict['fanseg_mask'], dtype=np.uint8) - inst.dfl_dict['fanseg_mask'] = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED) - - if inst.dfl_dict == None: - return None - - return inst - except Exception as e: - print (e) - return None - - @staticmethod - def embed_data(filename, face_type=None, - landmarks=None, - ie_polys=None, - source_filename=None, - source_rect=None, - source_landmarks=None, - image_to_face_mat=None, - fanseg_mask=None, - pitch_yaw_roll=None, - **kwargs - ): - - if fanseg_mask is not None: - fanseg_mask = np.clip ( (fanseg_mask*255).astype(np.uint8), 0, 255 ) - - ret, buf = cv2.imencode( '.jpg', fanseg_mask, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) - - if ret and len(buf) < 60000: - fanseg_mask = buf - else: - io.log_err("Unable to encode fanseg_mask for %s" % (filename) ) - fanseg_mask = None - - inst = DFLJPG.load_raw (filename) - inst.setDFLDictData ({ - 'face_type': face_type, - 'landmarks': landmarks, - 'ie_polys' : ie_polys.dump() if ie_polys is not None else None, - 'source_filename': source_filename, - 'source_rect': source_rect, - 'source_landmarks': source_landmarks, - 'image_to_face_mat': image_to_face_mat, - 'fanseg_mask' : fanseg_mask, - 'pitch_yaw_roll' : pitch_yaw_roll - }) - - try: - with open(filename, "wb") as f: - f.write ( inst.dump() ) - except: - raise Exception( 'cannot save %s' % (filename) ) - - def embed_and_set(self, filename, face_type=None, - landmarks=None, - ie_polys=None, - source_filename=None, - source_rect=None, - source_landmarks=None, - image_to_face_mat=None, - fanseg_mask=None, - pitch_yaw_roll=None, - **kwargs - ): - if face_type is None: face_type = self.get_face_type() - if landmarks is None: landmarks = self.get_landmarks() - if ie_polys is None: ie_polys = self.get_ie_polys() - if source_filename is None: source_filename = self.get_source_filename() - if source_rect is None: source_rect = self.get_source_rect() - if source_landmarks is None: source_landmarks = self.get_source_landmarks() - if image_to_face_mat is None: image_to_face_mat = self.get_image_to_face_mat() - if fanseg_mask is None: fanseg_mask = self.get_fanseg_mask() - if pitch_yaw_roll is None: pitch_yaw_roll = self.get_pitch_yaw_roll() - DFLJPG.embed_data (filename, face_type=face_type, - landmarks=landmarks, - ie_polys=ie_polys, - source_filename=source_filename, - source_rect=source_rect, - source_landmarks=source_landmarks, - image_to_face_mat=image_to_face_mat, - fanseg_mask=fanseg_mask, - pitch_yaw_roll=pitch_yaw_roll) - def remove_fanseg_mask(self): - self.dfl_dict['fanseg_mask'] = None - - def dump(self): - data = b"" - - for chunk in self.chunks: - data += struct.pack ("BB", 0xFF, chunk['m_h'] ) - chunk_data = chunk['data'] - if chunk_data is not None: - data += struct.pack (">H", len(chunk_data)+2 ) - data += chunk_data - - chunk_ex_data = chunk['ex_data'] - if chunk_ex_data is not None: - data += chunk_ex_data - - return data - - def get_shape(self): - return self.shape - - def get_height(self): - for chunk in self.chunks: - if type(chunk) == IHDR: - return chunk.height - return 0 - - def getDFLDictData(self): - return self.dfl_dict - - def setDFLDictData (self, dict_data=None): - self.dfl_dict = dict_data - - for chunk in self.chunks: - if chunk['name'] == 'APP15': - self.chunks.remove(chunk) - break - - last_app_chunk = 0 - for i, chunk in enumerate (self.chunks): - if chunk['m_h'] & 0xF0 == 0xE0: - last_app_chunk = i - - dflchunk = {'name' : 'APP15', - 'm_h' : 0xEF, - 'data' : pickle.dumps(dict_data), - 'ex_data' : None, - } - self.chunks.insert (last_app_chunk+1, dflchunk) - - def get_face_type(self): return self.dfl_dict['face_type'] - def get_landmarks(self): return np.array ( self.dfl_dict['landmarks'] ) - def get_ie_polys(self): return IEPolys.load(self.dfl_dict.get('ie_polys',None)) - def get_source_filename(self): return self.dfl_dict['source_filename'] - def get_source_rect(self): return self.dfl_dict['source_rect'] - def get_source_landmarks(self): return np.array ( self.dfl_dict['source_landmarks'] ) - def get_image_to_face_mat(self): - mat = self.dfl_dict.get ('image_to_face_mat', None) - if mat is not None: - return np.array (mat) - return None - def get_fanseg_mask(self): - fanseg_mask = self.dfl_dict.get ('fanseg_mask', None) - if fanseg_mask is not None: - return np.clip ( np.array (fanseg_mask) / 255.0, 0.0, 1.0 )[...,np.newaxis] - return None - def get_pitch_yaw_roll(self): - return self.dfl_dict.get ('pitch_yaw_roll', None) - +import pickle +import struct + +import cv2 +import numpy as np + +from facelib import FaceType +from imagelib import IEPolys +from utils.struct_utils import * +from interact import interact as io + +class DFLJPG(object): + def __init__(self): + self.data = b"" + self.length = 0 + self.chunks = [] + self.dfl_dict = None + self.shape = (0,0,0) + + @staticmethod + def load_raw(filename): + try: + with open(filename, "rb") as f: + data = f.read() + except: + raise FileNotFoundError(filename) + + try: + inst = DFLJPG() + inst.data = data + inst.length = len(data) + inst_length = inst.length + chunks = [] + data_counter = 0 + while data_counter < inst_length: + chunk_m_l, chunk_m_h = struct.unpack ("BB", data[data_counter:data_counter+2]) + data_counter += 2 + + if chunk_m_l != 0xFF: + raise ValueError("No Valid JPG info") + + chunk_name = None + chunk_size = None + chunk_data = None + chunk_ex_data = None + is_unk_chunk = False + + if chunk_m_h & 0xF0 == 0xD0: + n = chunk_m_h & 0x0F + + if n >= 0 and n <= 7: + chunk_name = "RST%d" % (n) + chunk_size = 0 + elif n == 0x8: + chunk_name = "SOI" + chunk_size = 0 + if len(chunks) != 0: + raise Exception("") + elif n == 0x9: + chunk_name = "EOI" + chunk_size = 0 + elif n == 0xA: + chunk_name = "SOS" + elif n == 0xB: + chunk_name = "DQT" + elif n == 0xD: + chunk_name = "DRI" + chunk_size = 2 + else: + is_unk_chunk = True + elif chunk_m_h & 0xF0 == 0xC0: + n = chunk_m_h & 0x0F + if n == 0: + chunk_name = "SOF0" + elif n == 2: + chunk_name = "SOF2" + elif n == 4: + chunk_name = "DHT" + else: + is_unk_chunk = True + elif chunk_m_h & 0xF0 == 0xE0: + n = chunk_m_h & 0x0F + chunk_name = "APP%d" % (n) + else: + is_unk_chunk = True + + if is_unk_chunk: + raise ValueError("Unknown chunk %X" % (chunk_m_h) ) + + if chunk_size == None: #variable size + chunk_size, = struct.unpack (">H", data[data_counter:data_counter+2]) + chunk_size -= 2 + data_counter += 2 + + if chunk_size > 0: + chunk_data = data[data_counter:data_counter+chunk_size] + data_counter += chunk_size + + if chunk_name == "SOS": + c = data_counter + while c < inst_length and (data[c] != 0xFF or data[c+1] != 0xD9): + c += 1 + + chunk_ex_data = data[data_counter:c] + data_counter = c + + chunks.append ({'name' : chunk_name, + 'm_h' : chunk_m_h, + 'data' : chunk_data, + 'ex_data' : chunk_ex_data, + }) + inst.chunks = chunks + + return inst + except Exception as e: + raise Exception ("Corrupted JPG file: %s" % (str(e))) + + @staticmethod + def load(filename): + try: + inst = DFLJPG.load_raw (filename) + inst.dfl_dict = None + + for chunk in inst.chunks: + if chunk['name'] == 'APP0': + d, c = chunk['data'], 0 + c, id, _ = struct_unpack (d, c, "=4sB") + + if id == b"JFIF": + c, ver_major, ver_minor, units, Xdensity, Ydensity, Xthumbnail, Ythumbnail = struct_unpack (d, c, "=BBBHHBB") + #if units == 0: + # inst.shape = (Ydensity, Xdensity, 3) + else: + raise Exception("Unknown jpeg ID: %s" % (id) ) + elif chunk['name'] == 'SOF0' or chunk['name'] == 'SOF2': + d, c = chunk['data'], 0 + c, precision, height, width = struct_unpack (d, c, ">BHH") + inst.shape = (height, width, 3) + + elif chunk['name'] == 'APP15': + if type(chunk['data']) == bytes: + inst.dfl_dict = pickle.loads(chunk['data']) + + if (inst.dfl_dict is not None): + if 'face_type' not in inst.dfl_dict: + inst.dfl_dict['face_type'] = FaceType.toString (FaceType.FULL) + + if 'fanseg_mask' in inst.dfl_dict: + fanseg_mask = inst.dfl_dict['fanseg_mask'] + if fanseg_mask is not None: + numpyarray = np.asarray( inst.dfl_dict['fanseg_mask'], dtype=np.uint8) + inst.dfl_dict['fanseg_mask'] = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED) + + if inst.dfl_dict == None: + return None + + return inst + except Exception as e: + print (e) + return None + + @staticmethod + def embed_data(filename, face_type=None, + landmarks=None, + ie_polys=None, + source_filename=None, + source_rect=None, + source_landmarks=None, + image_to_face_mat=None, + fanseg_mask=None, + pitch_yaw_roll=None, + **kwargs + ): + + if fanseg_mask is not None: + fanseg_mask = np.clip ( (fanseg_mask*255).astype(np.uint8), 0, 255 ) + + ret, buf = cv2.imencode( '.jpg', fanseg_mask, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) + + if ret and len(buf) < 60000: + fanseg_mask = buf + else: + io.log_err("Unable to encode fanseg_mask for %s" % (filename) ) + fanseg_mask = None + + inst = DFLJPG.load_raw (filename) + inst.setDFLDictData ({ + 'face_type': face_type, + 'landmarks': landmarks, + 'ie_polys' : ie_polys.dump() if ie_polys is not None else None, + 'source_filename': source_filename, + 'source_rect': source_rect, + 'source_landmarks': source_landmarks, + 'image_to_face_mat': image_to_face_mat, + 'fanseg_mask' : fanseg_mask, + 'pitch_yaw_roll' : pitch_yaw_roll + }) + + try: + with open(filename, "wb") as f: + f.write ( inst.dump() ) + except: + raise Exception( 'cannot save %s' % (filename) ) + + def embed_and_set(self, filename, face_type=None, + landmarks=None, + ie_polys=None, + source_filename=None, + source_rect=None, + source_landmarks=None, + image_to_face_mat=None, + fanseg_mask=None, + pitch_yaw_roll=None, + **kwargs + ): + if face_type is None: face_type = self.get_face_type() + if landmarks is None: landmarks = self.get_landmarks() + if ie_polys is None: ie_polys = self.get_ie_polys() + if source_filename is None: source_filename = self.get_source_filename() + if source_rect is None: source_rect = self.get_source_rect() + if source_landmarks is None: source_landmarks = self.get_source_landmarks() + if image_to_face_mat is None: image_to_face_mat = self.get_image_to_face_mat() + if fanseg_mask is None: fanseg_mask = self.get_fanseg_mask() + if pitch_yaw_roll is None: pitch_yaw_roll = self.get_pitch_yaw_roll() + DFLJPG.embed_data (filename, face_type=face_type, + landmarks=landmarks, + ie_polys=ie_polys, + source_filename=source_filename, + source_rect=source_rect, + source_landmarks=source_landmarks, + image_to_face_mat=image_to_face_mat, + fanseg_mask=fanseg_mask, + pitch_yaw_roll=pitch_yaw_roll) + def remove_fanseg_mask(self): + self.dfl_dict['fanseg_mask'] = None + + def dump(self): + data = b"" + + for chunk in self.chunks: + data += struct.pack ("BB", 0xFF, chunk['m_h'] ) + chunk_data = chunk['data'] + if chunk_data is not None: + data += struct.pack (">H", len(chunk_data)+2 ) + data += chunk_data + + chunk_ex_data = chunk['ex_data'] + if chunk_ex_data is not None: + data += chunk_ex_data + + return data + + def get_shape(self): + return self.shape + + def get_height(self): + for chunk in self.chunks: + if type(chunk) == IHDR: + return chunk.height + return 0 + + def getDFLDictData(self): + return self.dfl_dict + + def setDFLDictData (self, dict_data=None): + self.dfl_dict = dict_data + + for chunk in self.chunks: + if chunk['name'] == 'APP15': + self.chunks.remove(chunk) + break + + last_app_chunk = 0 + for i, chunk in enumerate (self.chunks): + if chunk['m_h'] & 0xF0 == 0xE0: + last_app_chunk = i + + dflchunk = {'name' : 'APP15', + 'm_h' : 0xEF, + 'data' : pickle.dumps(dict_data), + 'ex_data' : None, + } + self.chunks.insert (last_app_chunk+1, dflchunk) + + def get_face_type(self): return self.dfl_dict['face_type'] + def get_landmarks(self): return np.array ( self.dfl_dict['landmarks'] ) + def get_ie_polys(self): return IEPolys.load(self.dfl_dict.get('ie_polys',None)) + def get_source_filename(self): return self.dfl_dict['source_filename'] + def get_source_rect(self): return self.dfl_dict['source_rect'] + def get_source_landmarks(self): return np.array ( self.dfl_dict['source_landmarks'] ) + def get_image_to_face_mat(self): + mat = self.dfl_dict.get ('image_to_face_mat', None) + if mat is not None: + return np.array (mat) + return None + def get_fanseg_mask(self): + fanseg_mask = self.dfl_dict.get ('fanseg_mask', None) + if fanseg_mask is not None: + return np.clip ( np.array (fanseg_mask) / 255.0, 0.0, 1.0 )[...,np.newaxis] + return None + def get_pitch_yaw_roll(self): + return self.dfl_dict.get ('pitch_yaw_roll', None) + diff --git a/utils/DFLPNG.py b/utils/DFLPNG.py index d1fb76e..daff714 100644 --- a/utils/DFLPNG.py +++ b/utils/DFLPNG.py @@ -1,410 +1,410 @@ -import pickle -import string -import struct -import zlib - -import cv2 -import numpy as np - -from facelib import FaceType -from imagelib import IEPolys - -PNG_HEADER = b"\x89PNG\r\n\x1a\n" - -class Chunk(object): - def __init__(self, name=None, data=None): - self.length = 0 - self.crc = 0 - self.name = name if name else "noNe" - self.data = data if data else b"" - - @classmethod - def load(cls, data): - """Load a chunk including header and footer""" - inst = cls() - if len(data) < 12: - msg = "Chunk-data too small" - raise ValueError(msg) - - # chunk header & data - (inst.length, raw_name) = struct.unpack("!I4s", data[0:8]) - inst.data = data[8:-4] - inst.verify_length() - inst.name = raw_name.decode("ascii") - inst.verify_name() - - # chunk crc - inst.crc = struct.unpack("!I", data[8+inst.length:8+inst.length+4])[0] - inst.verify_crc() - - return inst - - def dump(self, auto_crc=True, auto_length=True): - """Return the chunk including header and footer""" - if auto_length: self.update_length() - if auto_crc: self.update_crc() - self.verify_name() - return struct.pack("!I", self.length) + self.get_raw_name() + self.data + struct.pack("!I", self.crc) - - def verify_length(self): - if len(self.data) != self.length: - msg = "Data length ({}) does not match length in chunk header ({})".format(len(self.data), self.length) - raise ValueError(msg) - return True - - def verify_name(self): - for c in self.name: - if c not in string.ascii_letters: - msg = "Invalid character in chunk name: {}".format(repr(self.name)) - raise ValueError(msg) - return True - - def verify_crc(self): - calculated_crc = self.get_crc() - if self.crc != calculated_crc: - msg = "CRC mismatch: {:08X} (header), {:08X} (calculated)".format(self.crc, calculated_crc) - raise ValueError(msg) - return True - - def update_length(self): - self.length = len(self.data) - - def update_crc(self): - self.crc = self.get_crc() - - def get_crc(self): - return zlib.crc32(self.get_raw_name() + self.data) - - def get_raw_name(self): - return self.name if isinstance(self.name, bytes) else self.name.encode("ascii") - - # name helper methods - - def ancillary(self, set=None): - """Set and get ancillary=True/critical=False bit""" - if set is True: - self.name[0] = self.name[0].lower() - elif set is False: - self.name[0] = self.name[0].upper() - return self.name[0].islower() - - def private(self, set=None): - """Set and get private=True/public=False bit""" - if set is True: - self.name[1] = self.name[1].lower() - elif set is False: - self.name[1] = self.name[1].upper() - return self.name[1].islower() - - def reserved(self, set=None): - """Set and get reserved_valid=True/invalid=False bit""" - if set is True: - self.name[2] = self.name[2].upper() - elif set is False: - self.name[2] = self.name[2].lower() - return self.name[2].isupper() - - def safe_to_copy(self, set=None): - """Set and get save_to_copy=True/unsafe=False bit""" - if set is True: - self.name[3] = self.name[3].lower() - elif set is False: - self.name[3] = self.name[3].upper() - return self.name[3].islower() - - def __str__(self): - return "".format(**self.__dict__) - -class IHDR(Chunk): - """IHDR Chunk - width, height, bit_depth, color_type, compression_method, - filter_method, interlace_method contain the data extracted - from the chunk. Modify those and use and build() to recreate - the chunk. Valid values for bit_depth depend on the color_type - and can be looked up in color_types or in the PNG specification - - See: - http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.IHDR - """ - # color types with name & allowed bit depths - COLOR_TYPE_GRAY = 0 - COLOR_TYPE_RGB = 2 - COLOR_TYPE_PLTE = 3 - COLOR_TYPE_GRAYA = 4 - COLOR_TYPE_RGBA = 6 - color_types = { - COLOR_TYPE_GRAY: ("Grayscale", (1,2,4,8,16)), - COLOR_TYPE_RGB: ("RGB", (8,16)), - COLOR_TYPE_PLTE: ("Palette", (1,2,4,8)), - COLOR_TYPE_GRAYA: ("Greyscale+Alpha", (8,16)), - COLOR_TYPE_RGBA: ("RGBA", (8,16)), - } - - def __init__(self, width=0, height=0, bit_depth=8, color_type=2, \ - compression_method=0, filter_method=0, interlace_method=0): - self.width = width - self.height = height - self.bit_depth = bit_depth - self.color_type = color_type - self.compression_method = compression_method - self.filter_method = filter_method - self.interlace_method = interlace_method - super().__init__("IHDR") - - @classmethod - def load(cls, data): - inst = super().load(data) - fields = struct.unpack("!IIBBBBB", inst.data) - inst.width = fields[0] - inst.height = fields[1] - inst.bit_depth = fields[2] # per channel - inst.color_type = fields[3] # see specs - inst.compression_method = fields[4] # always 0(=deflate/inflate) - inst.filter_method = fields[5] # always 0(=adaptive filtering with 5 methods) - inst.interlace_method = fields[6] # 0(=no interlace) or 1(=Adam7 interlace) - return inst - - def dump(self): - self.data = struct.pack("!IIBBBBB", \ - self.width, self.height, self.bit_depth, self.color_type, \ - self.compression_method, self.filter_method, self.interlace_method) - return super().dump() - - def __str__(self): - return "" \ - .format(self.color_types[self.color_type][0], **self.__dict__) - -class IEND(Chunk): - def __init__(self): - super().__init__("IEND") - - def dump(self): - if len(self.data) != 0: - msg = "IEND has data which is not allowed" - raise ValueError(msg) - if self.length != 0: - msg = "IEND data lenght is not 0 which is not allowed" - raise ValueError(msg) - return super().dump() - - def __str__(self): - return "".format(**self.__dict__) - -class DFLChunk(Chunk): - def __init__(self, dict_data=None): - super().__init__("fcWp") - self.dict_data = dict_data - - def setDictData(self, dict_data): - self.dict_data = dict_data - - def getDictData(self): - return self.dict_data - - @classmethod - def load(cls, data): - inst = super().load(data) - inst.dict_data = pickle.loads( inst.data ) - return inst - - def dump(self): - self.data = pickle.dumps (self.dict_data) - return super().dump() - -chunk_map = { - b"IHDR": IHDR, - b"fcWp": DFLChunk, - b"IEND": IEND -} - -class DFLPNG(object): - def __init__(self): - self.data = b"" - self.length = 0 - self.chunks = [] - self.dfl_dict = None - - @staticmethod - def load_raw(filename): - try: - with open(filename, "rb") as f: - data = f.read() - except: - raise FileNotFoundError(filename) - - inst = DFLPNG() - inst.data = data - inst.length = len(data) - - if data[0:8] != PNG_HEADER: - msg = "No Valid PNG header" - raise ValueError(msg) - - chunk_start = 8 - while chunk_start < inst.length: - (chunk_length, chunk_name) = struct.unpack("!I4s", data[chunk_start:chunk_start+8]) - chunk_end = chunk_start + chunk_length + 12 - - chunk = chunk_map.get(chunk_name, Chunk).load(data[chunk_start:chunk_end]) - inst.chunks.append(chunk) - chunk_start = chunk_end - - return inst - - @staticmethod - def load(filename): - try: - inst = DFLPNG.load_raw (filename) - inst.dfl_dict = inst.getDFLDictData() - - if inst.dfl_dict is not None: - if 'face_type' not in inst.dfl_dict: - inst.dfl_dict['face_type'] = FaceType.toString (FaceType.FULL) - - if 'fanseg_mask' in inst.dfl_dict: - fanseg_mask = inst.dfl_dict['fanseg_mask'] - if fanseg_mask is not None: - numpyarray = np.asarray( inst.dfl_dict['fanseg_mask'], dtype=np.uint8) - inst.dfl_dict['fanseg_mask'] = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED) - - if inst.dfl_dict == None: - return None - - return inst - except Exception as e: - print(e) - return None - - @staticmethod - def embed_data(filename, face_type=None, - landmarks=None, - ie_polys=None, - source_filename=None, - source_rect=None, - source_landmarks=None, - image_to_face_mat=None, - fanseg_mask=None, - pitch_yaw_roll=None, - **kwargs - ): - - if fanseg_mask is not None: - fanseg_mask = np.clip ( (fanseg_mask*255).astype(np.uint8), 0, 255 ) - - ret, buf = cv2.imencode( '.jpg', fanseg_mask, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) - - if ret and len(buf) < 60000: - fanseg_mask = buf - else: - io.log_err("Unable to encode fanseg_mask for %s" % (filename) ) - fanseg_mask = None - - inst = DFLPNG.load_raw (filename) - inst.setDFLDictData ({ - 'face_type': face_type, - 'landmarks': landmarks, - 'ie_polys' : ie_polys.dump() if ie_polys is not None else None, - 'source_filename': source_filename, - 'source_rect': source_rect, - 'source_landmarks': source_landmarks, - 'image_to_face_mat':image_to_face_mat, - 'fanseg_mask' : fanseg_mask, - 'pitch_yaw_roll' : pitch_yaw_roll - }) - - try: - with open(filename, "wb") as f: - f.write ( inst.dump() ) - except: - raise Exception( 'cannot save %s' % (filename) ) - - def embed_and_set(self, filename, face_type=None, - landmarks=None, - ie_polys=None, - source_filename=None, - source_rect=None, - source_landmarks=None, - image_to_face_mat=None, - fanseg_mask=None, - pitch_yaw_roll=None, - **kwargs - ): - if face_type is None: face_type = self.get_face_type() - if landmarks is None: landmarks = self.get_landmarks() - if ie_polys is None: ie_polys = self.get_ie_polys() - if source_filename is None: source_filename = self.get_source_filename() - if source_rect is None: source_rect = self.get_source_rect() - if source_landmarks is None: source_landmarks = self.get_source_landmarks() - if image_to_face_mat is None: image_to_face_mat = self.get_image_to_face_mat() - if fanseg_mask is None: fanseg_mask = self.get_fanseg_mask() - if pitch_yaw_roll is None: pitch_yaw_roll = self.get_pitch_yaw_roll() - DFLPNG.embed_data (filename, face_type=face_type, - landmarks=landmarks, - ie_polys=ie_polys, - source_filename=source_filename, - source_rect=source_rect, - source_landmarks=source_landmarks, - image_to_face_mat=image_to_face_mat, - fanseg_mask=fanseg_mask, - pitch_yaw_roll=pitch_yaw_roll) - - def remove_fanseg_mask(self): - self.dfl_dict['fanseg_mask'] = None - - def dump(self): - data = PNG_HEADER - for chunk in self.chunks: - data += chunk.dump() - return data - - def get_shape(self): - for chunk in self.chunks: - if type(chunk) == IHDR: - c = 3 if chunk.color_type == IHDR.COLOR_TYPE_RGB else 4 - w = chunk.width - h = chunk.height - return (h,w,c) - return (0,0,0) - - def get_height(self): - for chunk in self.chunks: - if type(chunk) == IHDR: - return chunk.height - return 0 - - def getDFLDictData(self): - for chunk in self.chunks: - if type(chunk) == DFLChunk: - return chunk.getDictData() - return None - - def setDFLDictData (self, dict_data=None): - for chunk in self.chunks: - if type(chunk) == DFLChunk: - self.chunks.remove(chunk) - break - - if not dict_data is None: - chunk = DFLChunk(dict_data) - self.chunks.insert(-1, chunk) - - def get_face_type(self): return self.dfl_dict['face_type'] - def get_landmarks(self): return np.array ( self.dfl_dict['landmarks'] ) - def get_ie_polys(self): return IEPolys.load(self.dfl_dict.get('ie_polys',None)) - def get_source_filename(self): return self.dfl_dict['source_filename'] - def get_source_rect(self): return self.dfl_dict['source_rect'] - def get_source_landmarks(self): return np.array ( self.dfl_dict['source_landmarks'] ) - def get_image_to_face_mat(self): - mat = self.dfl_dict.get ('image_to_face_mat', None) - if mat is not None: - return np.array (mat) - return None - def get_fanseg_mask(self): - fanseg_mask = self.dfl_dict.get ('fanseg_mask', None) - if fanseg_mask is not None: - return np.clip ( np.array (fanseg_mask) / 255.0, 0.0, 1.0 )[...,np.newaxis] - return None - def get_pitch_yaw_roll(self): - return self.dfl_dict.get ('pitch_yaw_roll', None) - def __str__(self): - return "".format(len(self.chunks), **self.__dict__) +import pickle +import string +import struct +import zlib + +import cv2 +import numpy as np + +from facelib import FaceType +from imagelib import IEPolys + +PNG_HEADER = b"\x89PNG\r\n\x1a\n" + +class Chunk(object): + def __init__(self, name=None, data=None): + self.length = 0 + self.crc = 0 + self.name = name if name else "noNe" + self.data = data if data else b"" + + @classmethod + def load(cls, data): + """Load a chunk including header and footer""" + inst = cls() + if len(data) < 12: + msg = "Chunk-data too small" + raise ValueError(msg) + + # chunk header & data + (inst.length, raw_name) = struct.unpack("!I4s", data[0:8]) + inst.data = data[8:-4] + inst.verify_length() + inst.name = raw_name.decode("ascii") + inst.verify_name() + + # chunk crc + inst.crc = struct.unpack("!I", data[8+inst.length:8+inst.length+4])[0] + inst.verify_crc() + + return inst + + def dump(self, auto_crc=True, auto_length=True): + """Return the chunk including header and footer""" + if auto_length: self.update_length() + if auto_crc: self.update_crc() + self.verify_name() + return struct.pack("!I", self.length) + self.get_raw_name() + self.data + struct.pack("!I", self.crc) + + def verify_length(self): + if len(self.data) != self.length: + msg = "Data length ({}) does not match length in chunk header ({})".format(len(self.data), self.length) + raise ValueError(msg) + return True + + def verify_name(self): + for c in self.name: + if c not in string.ascii_letters: + msg = "Invalid character in chunk name: {}".format(repr(self.name)) + raise ValueError(msg) + return True + + def verify_crc(self): + calculated_crc = self.get_crc() + if self.crc != calculated_crc: + msg = "CRC mismatch: {:08X} (header), {:08X} (calculated)".format(self.crc, calculated_crc) + raise ValueError(msg) + return True + + def update_length(self): + self.length = len(self.data) + + def update_crc(self): + self.crc = self.get_crc() + + def get_crc(self): + return zlib.crc32(self.get_raw_name() + self.data) + + def get_raw_name(self): + return self.name if isinstance(self.name, bytes) else self.name.encode("ascii") + + # name helper methods + + def ancillary(self, set=None): + """Set and get ancillary=True/critical=False bit""" + if set is True: + self.name[0] = self.name[0].lower() + elif set is False: + self.name[0] = self.name[0].upper() + return self.name[0].islower() + + def private(self, set=None): + """Set and get private=True/public=False bit""" + if set is True: + self.name[1] = self.name[1].lower() + elif set is False: + self.name[1] = self.name[1].upper() + return self.name[1].islower() + + def reserved(self, set=None): + """Set and get reserved_valid=True/invalid=False bit""" + if set is True: + self.name[2] = self.name[2].upper() + elif set is False: + self.name[2] = self.name[2].lower() + return self.name[2].isupper() + + def safe_to_copy(self, set=None): + """Set and get save_to_copy=True/unsafe=False bit""" + if set is True: + self.name[3] = self.name[3].lower() + elif set is False: + self.name[3] = self.name[3].upper() + return self.name[3].islower() + + def __str__(self): + return "".format(**self.__dict__) + +class IHDR(Chunk): + """IHDR Chunk + width, height, bit_depth, color_type, compression_method, + filter_method, interlace_method contain the data extracted + from the chunk. Modify those and use and build() to recreate + the chunk. Valid values for bit_depth depend on the color_type + and can be looked up in color_types or in the PNG specification + + See: + http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.IHDR + """ + # color types with name & allowed bit depths + COLOR_TYPE_GRAY = 0 + COLOR_TYPE_RGB = 2 + COLOR_TYPE_PLTE = 3 + COLOR_TYPE_GRAYA = 4 + COLOR_TYPE_RGBA = 6 + color_types = { + COLOR_TYPE_GRAY: ("Grayscale", (1,2,4,8,16)), + COLOR_TYPE_RGB: ("RGB", (8,16)), + COLOR_TYPE_PLTE: ("Palette", (1,2,4,8)), + COLOR_TYPE_GRAYA: ("Greyscale+Alpha", (8,16)), + COLOR_TYPE_RGBA: ("RGBA", (8,16)), + } + + def __init__(self, width=0, height=0, bit_depth=8, color_type=2, \ + compression_method=0, filter_method=0, interlace_method=0): + self.width = width + self.height = height + self.bit_depth = bit_depth + self.color_type = color_type + self.compression_method = compression_method + self.filter_method = filter_method + self.interlace_method = interlace_method + super().__init__("IHDR") + + @classmethod + def load(cls, data): + inst = super().load(data) + fields = struct.unpack("!IIBBBBB", inst.data) + inst.width = fields[0] + inst.height = fields[1] + inst.bit_depth = fields[2] # per channel + inst.color_type = fields[3] # see specs + inst.compression_method = fields[4] # always 0(=deflate/inflate) + inst.filter_method = fields[5] # always 0(=adaptive filtering with 5 methods) + inst.interlace_method = fields[6] # 0(=no interlace) or 1(=Adam7 interlace) + return inst + + def dump(self): + self.data = struct.pack("!IIBBBBB", \ + self.width, self.height, self.bit_depth, self.color_type, \ + self.compression_method, self.filter_method, self.interlace_method) + return super().dump() + + def __str__(self): + return "" \ + .format(self.color_types[self.color_type][0], **self.__dict__) + +class IEND(Chunk): + def __init__(self): + super().__init__("IEND") + + def dump(self): + if len(self.data) != 0: + msg = "IEND has data which is not allowed" + raise ValueError(msg) + if self.length != 0: + msg = "IEND data lenght is not 0 which is not allowed" + raise ValueError(msg) + return super().dump() + + def __str__(self): + return "".format(**self.__dict__) + +class DFLChunk(Chunk): + def __init__(self, dict_data=None): + super().__init__("fcWp") + self.dict_data = dict_data + + def setDictData(self, dict_data): + self.dict_data = dict_data + + def getDictData(self): + return self.dict_data + + @classmethod + def load(cls, data): + inst = super().load(data) + inst.dict_data = pickle.loads( inst.data ) + return inst + + def dump(self): + self.data = pickle.dumps (self.dict_data) + return super().dump() + +chunk_map = { + b"IHDR": IHDR, + b"fcWp": DFLChunk, + b"IEND": IEND +} + +class DFLPNG(object): + def __init__(self): + self.data = b"" + self.length = 0 + self.chunks = [] + self.dfl_dict = None + + @staticmethod + def load_raw(filename): + try: + with open(filename, "rb") as f: + data = f.read() + except: + raise FileNotFoundError(filename) + + inst = DFLPNG() + inst.data = data + inst.length = len(data) + + if data[0:8] != PNG_HEADER: + msg = "No Valid PNG header" + raise ValueError(msg) + + chunk_start = 8 + while chunk_start < inst.length: + (chunk_length, chunk_name) = struct.unpack("!I4s", data[chunk_start:chunk_start+8]) + chunk_end = chunk_start + chunk_length + 12 + + chunk = chunk_map.get(chunk_name, Chunk).load(data[chunk_start:chunk_end]) + inst.chunks.append(chunk) + chunk_start = chunk_end + + return inst + + @staticmethod + def load(filename): + try: + inst = DFLPNG.load_raw (filename) + inst.dfl_dict = inst.getDFLDictData() + + if inst.dfl_dict is not None: + if 'face_type' not in inst.dfl_dict: + inst.dfl_dict['face_type'] = FaceType.toString (FaceType.FULL) + + if 'fanseg_mask' in inst.dfl_dict: + fanseg_mask = inst.dfl_dict['fanseg_mask'] + if fanseg_mask is not None: + numpyarray = np.asarray( inst.dfl_dict['fanseg_mask'], dtype=np.uint8) + inst.dfl_dict['fanseg_mask'] = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED) + + if inst.dfl_dict == None: + return None + + return inst + except Exception as e: + print(e) + return None + + @staticmethod + def embed_data(filename, face_type=None, + landmarks=None, + ie_polys=None, + source_filename=None, + source_rect=None, + source_landmarks=None, + image_to_face_mat=None, + fanseg_mask=None, + pitch_yaw_roll=None, + **kwargs + ): + + if fanseg_mask is not None: + fanseg_mask = np.clip ( (fanseg_mask*255).astype(np.uint8), 0, 255 ) + + ret, buf = cv2.imencode( '.jpg', fanseg_mask, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) + + if ret and len(buf) < 60000: + fanseg_mask = buf + else: + io.log_err("Unable to encode fanseg_mask for %s" % (filename) ) + fanseg_mask = None + + inst = DFLPNG.load_raw (filename) + inst.setDFLDictData ({ + 'face_type': face_type, + 'landmarks': landmarks, + 'ie_polys' : ie_polys.dump() if ie_polys is not None else None, + 'source_filename': source_filename, + 'source_rect': source_rect, + 'source_landmarks': source_landmarks, + 'image_to_face_mat':image_to_face_mat, + 'fanseg_mask' : fanseg_mask, + 'pitch_yaw_roll' : pitch_yaw_roll + }) + + try: + with open(filename, "wb") as f: + f.write ( inst.dump() ) + except: + raise Exception( 'cannot save %s' % (filename) ) + + def embed_and_set(self, filename, face_type=None, + landmarks=None, + ie_polys=None, + source_filename=None, + source_rect=None, + source_landmarks=None, + image_to_face_mat=None, + fanseg_mask=None, + pitch_yaw_roll=None, + **kwargs + ): + if face_type is None: face_type = self.get_face_type() + if landmarks is None: landmarks = self.get_landmarks() + if ie_polys is None: ie_polys = self.get_ie_polys() + if source_filename is None: source_filename = self.get_source_filename() + if source_rect is None: source_rect = self.get_source_rect() + if source_landmarks is None: source_landmarks = self.get_source_landmarks() + if image_to_face_mat is None: image_to_face_mat = self.get_image_to_face_mat() + if fanseg_mask is None: fanseg_mask = self.get_fanseg_mask() + if pitch_yaw_roll is None: pitch_yaw_roll = self.get_pitch_yaw_roll() + DFLPNG.embed_data (filename, face_type=face_type, + landmarks=landmarks, + ie_polys=ie_polys, + source_filename=source_filename, + source_rect=source_rect, + source_landmarks=source_landmarks, + image_to_face_mat=image_to_face_mat, + fanseg_mask=fanseg_mask, + pitch_yaw_roll=pitch_yaw_roll) + + def remove_fanseg_mask(self): + self.dfl_dict['fanseg_mask'] = None + + def dump(self): + data = PNG_HEADER + for chunk in self.chunks: + data += chunk.dump() + return data + + def get_shape(self): + for chunk in self.chunks: + if type(chunk) == IHDR: + c = 3 if chunk.color_type == IHDR.COLOR_TYPE_RGB else 4 + w = chunk.width + h = chunk.height + return (h,w,c) + return (0,0,0) + + def get_height(self): + for chunk in self.chunks: + if type(chunk) == IHDR: + return chunk.height + return 0 + + def getDFLDictData(self): + for chunk in self.chunks: + if type(chunk) == DFLChunk: + return chunk.getDictData() + return None + + def setDFLDictData (self, dict_data=None): + for chunk in self.chunks: + if type(chunk) == DFLChunk: + self.chunks.remove(chunk) + break + + if not dict_data is None: + chunk = DFLChunk(dict_data) + self.chunks.insert(-1, chunk) + + def get_face_type(self): return self.dfl_dict['face_type'] + def get_landmarks(self): return np.array ( self.dfl_dict['landmarks'] ) + def get_ie_polys(self): return IEPolys.load(self.dfl_dict.get('ie_polys',None)) + def get_source_filename(self): return self.dfl_dict['source_filename'] + def get_source_rect(self): return self.dfl_dict['source_rect'] + def get_source_landmarks(self): return np.array ( self.dfl_dict['source_landmarks'] ) + def get_image_to_face_mat(self): + mat = self.dfl_dict.get ('image_to_face_mat', None) + if mat is not None: + return np.array (mat) + return None + def get_fanseg_mask(self): + fanseg_mask = self.dfl_dict.get ('fanseg_mask', None) + if fanseg_mask is not None: + return np.clip ( np.array (fanseg_mask) / 255.0, 0.0, 1.0 )[...,np.newaxis] + return None + def get_pitch_yaw_roll(self): + return self.dfl_dict.get ('pitch_yaw_roll', None) + def __str__(self): + return "".format(len(self.chunks), **self.__dict__) diff --git a/utils/Path_utils.py b/utils/Path_utils.py index c9ed448..09258a2 100644 --- a/utils/Path_utils.py +++ b/utils/Path_utils.py @@ -1,83 +1,83 @@ -from pathlib import Path -from os import scandir - -image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] - -def get_image_paths(dir_path, image_extensions=image_extensions): - dir_path = Path (dir_path) - - result = [] - if dir_path.exists(): - for x in list(scandir(str(dir_path))): - if any([x.name.lower().endswith(ext) for ext in image_extensions]): - result.append(x.path) - return result - -def get_image_unique_filestem_paths(dir_path, verbose_print_func=None): - result = get_image_paths(dir_path) - result_dup = set() - - for f in result[:]: - f_stem = Path(f).stem - if f_stem in result_dup: - result.remove(f) - if verbose_print_func is not None: - verbose_print_func ("Duplicate filenames are not allowed, skipping: %s" % Path(f).name ) - continue - result_dup.add(f_stem) - - return result - -def get_file_paths(dir_path): - dir_path = Path (dir_path) - - result = [] - if dir_path.exists(): - return [ x.path for x in list(scandir(str(dir_path))) if x.is_file() ] - return result - -def get_all_dir_names (dir_path): - dir_path = Path (dir_path) - - result = [] - if dir_path.exists(): - return [ x.name for x in list(scandir(str(dir_path))) if x.is_dir() ] - - return result - -def get_all_dir_names_startswith (dir_path, startswith): - dir_path = Path (dir_path) - startswith = startswith.lower() - - result = [] - if dir_path.exists(): - for x in list(scandir(str(dir_path))): - if x.name.lower().startswith(startswith): - result.append ( x.name[len(startswith):] ) - return result - -def get_first_file_by_stem (dir_path, stem, exts=None): - dir_path = Path (dir_path) - stem = stem.lower() - - if dir_path.exists(): - for x in list(scandir(str(dir_path))): - if not x.is_file(): - continue - xp = Path(x.path) - if xp.stem.lower() == stem and (exts is None or xp.suffix.lower() in exts): - return xp - - return None - -def move_all_files (src_dir_path, dst_dir_path): - paths = get_file_paths(src_dir_path) - for p in paths: - p = Path(p) - p.rename ( Path(dst_dir_path) / p.name ) - -def delete_all_files (dir_path): - paths = get_file_paths(dir_path) - for p in paths: - p = Path(p) +from pathlib import Path +from os import scandir + +image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + +def get_image_paths(dir_path, image_extensions=image_extensions): + dir_path = Path (dir_path) + + result = [] + if dir_path.exists(): + for x in list(scandir(str(dir_path))): + if any([x.name.lower().endswith(ext) for ext in image_extensions]): + result.append(x.path) + return result + +def get_image_unique_filestem_paths(dir_path, verbose_print_func=None): + result = get_image_paths(dir_path) + result_dup = set() + + for f in result[:]: + f_stem = Path(f).stem + if f_stem in result_dup: + result.remove(f) + if verbose_print_func is not None: + verbose_print_func ("Duplicate filenames are not allowed, skipping: %s" % Path(f).name ) + continue + result_dup.add(f_stem) + + return result + +def get_file_paths(dir_path): + dir_path = Path (dir_path) + + result = [] + if dir_path.exists(): + return [ x.path for x in list(scandir(str(dir_path))) if x.is_file() ] + return result + +def get_all_dir_names (dir_path): + dir_path = Path (dir_path) + + result = [] + if dir_path.exists(): + return [ x.name for x in list(scandir(str(dir_path))) if x.is_dir() ] + + return result + +def get_all_dir_names_startswith (dir_path, startswith): + dir_path = Path (dir_path) + startswith = startswith.lower() + + result = [] + if dir_path.exists(): + for x in list(scandir(str(dir_path))): + if x.name.lower().startswith(startswith): + result.append ( x.name[len(startswith):] ) + return result + +def get_first_file_by_stem (dir_path, stem, exts=None): + dir_path = Path (dir_path) + stem = stem.lower() + + if dir_path.exists(): + for x in list(scandir(str(dir_path))): + if not x.is_file(): + continue + xp = Path(x.path) + if xp.stem.lower() == stem and (exts is None or xp.suffix.lower() in exts): + return xp + + return None + +def move_all_files (src_dir_path, dst_dir_path): + paths = get_file_paths(src_dir_path) + for p in paths: + p = Path(p) + p.rename ( Path(dst_dir_path) / p.name ) + +def delete_all_files (dir_path): + paths = get_file_paths(dir_path) + for p in paths: + p = Path(p) p.unlink() \ No newline at end of file diff --git a/utils/cv2_utils.py b/utils/cv2_utils.py index ff8d82a..63b8ff6 100644 --- a/utils/cv2_utils.py +++ b/utils/cv2_utils.py @@ -1,22 +1,22 @@ -import cv2 -import numpy as np -from pathlib import Path - -#allows to open non-english characters path -def cv2_imread(filename, flags=cv2.IMREAD_UNCHANGED): - try: - with open(filename, "rb") as stream: - bytes = bytearray(stream.read()) - numpyarray = np.asarray(bytes, dtype=np.uint8) - return cv2.imdecode(numpyarray, flags) - except: - return None - -def cv2_imwrite(filename, img, *args): - ret, buf = cv2.imencode( Path(filename).suffix, img, *args) - if ret == True: - try: - with open(filename, "wb") as stream: - stream.write( buf ) - except: - pass +import cv2 +import numpy as np +from pathlib import Path + +#allows to open non-english characters path +def cv2_imread(filename, flags=cv2.IMREAD_UNCHANGED): + try: + with open(filename, "rb") as stream: + bytes = bytearray(stream.read()) + numpyarray = np.asarray(bytes, dtype=np.uint8) + return cv2.imdecode(numpyarray, flags) + except: + return None + +def cv2_imwrite(filename, img, *args): + ret, buf = cv2.imencode( Path(filename).suffix, img, *args) + if ret == True: + try: + with open(filename, "wb") as stream: + stream.write( buf ) + except: + pass diff --git a/utils/iter_utils.py b/utils/iter_utils.py index eb00c55..7e54f0b 100644 --- a/utils/iter_utils.py +++ b/utils/iter_utils.py @@ -1,70 +1,70 @@ -import threading -import queue as Queue -import multiprocessing -import time - - -class ThisThreadGenerator(object): - def __init__(self, generator_func, user_param=None): - super().__init__() - self.generator_func = generator_func - self.user_param = user_param - self.initialized = False - - def __iter__(self): - return self - - def __next__(self): - if not self.initialized: - self.initialized = True - self.generator_func = self.generator_func(self.user_param) - - return next(self.generator_func) - -class SubprocessGenerator(object): - def __init__(self, generator_func, user_param=None, prefetch=2): - super().__init__() - self.prefetch = prefetch - self.generator_func = generator_func - self.user_param = user_param - self.sc_queue = multiprocessing.Queue() - self.cs_queue = multiprocessing.Queue() - self.p = None - - def process_func(self, user_param): - self.generator_func = self.generator_func(user_param) - while True: - while self.prefetch > -1: - try: - gen_data = next (self.generator_func) - except StopIteration: - self.cs_queue.put (None) - return - self.cs_queue.put (gen_data) - self.prefetch -= 1 - self.sc_queue.get() - self.prefetch += 1 - - def __iter__(self): - return self - - def __getstate__(self): - self_dict = self.__dict__.copy() - del self_dict['p'] - return self_dict - - def __next__(self): - if self.p == None: - user_param = self.user_param - self.user_param = None - self.p = multiprocessing.Process(target=self.process_func, args=(user_param,) ) - self.p.daemon = True - self.p.start() - - gen_data = self.cs_queue.get() - if gen_data is None: - self.p.terminate() - self.p.join() - raise StopIteration() - self.sc_queue.put (1) - return gen_data +import threading +import queue as Queue +import multiprocessing +import time + + +class ThisThreadGenerator(object): + def __init__(self, generator_func, user_param=None): + super().__init__() + self.generator_func = generator_func + self.user_param = user_param + self.initialized = False + + def __iter__(self): + return self + + def __next__(self): + if not self.initialized: + self.initialized = True + self.generator_func = self.generator_func(self.user_param) + + return next(self.generator_func) + +class SubprocessGenerator(object): + def __init__(self, generator_func, user_param=None, prefetch=2): + super().__init__() + self.prefetch = prefetch + self.generator_func = generator_func + self.user_param = user_param + self.sc_queue = multiprocessing.Queue() + self.cs_queue = multiprocessing.Queue() + self.p = None + + def process_func(self, user_param): + self.generator_func = self.generator_func(user_param) + while True: + while self.prefetch > -1: + try: + gen_data = next (self.generator_func) + except StopIteration: + self.cs_queue.put (None) + return + self.cs_queue.put (gen_data) + self.prefetch -= 1 + self.sc_queue.get() + self.prefetch += 1 + + def __iter__(self): + return self + + def __getstate__(self): + self_dict = self.__dict__.copy() + del self_dict['p'] + return self_dict + + def __next__(self): + if self.p == None: + user_param = self.user_param + self.user_param = None + self.p = multiprocessing.Process(target=self.process_func, args=(user_param,) ) + self.p.daemon = True + self.p.start() + + gen_data = self.cs_queue.get() + if gen_data is None: + self.p.terminate() + self.p.join() + raise StopIteration() + self.sc_queue.put (1) + return gen_data diff --git a/utils/os_utils.py b/utils/os_utils.py index ff1bd98..0ce7c75 100644 --- a/utils/os_utils.py +++ b/utils/os_utils.py @@ -1,25 +1,25 @@ -import os -import sys - -if sys.platform[0:3] == 'win': - from ctypes import windll - from ctypes import wintypes - -def set_process_lowest_prio(): - try: - if sys.platform[0:3] == 'win': - GetCurrentProcess = windll.kernel32.GetCurrentProcess - GetCurrentProcess.restype = wintypes.HANDLE - SetPriorityClass = windll.kernel32.SetPriorityClass - SetPriorityClass.argtypes = (wintypes.HANDLE, wintypes.DWORD) - SetPriorityClass ( GetCurrentProcess(), 0x00000040 ) - elif 'darwin' in sys.platform: - os.nice(10) - elif 'linux' in sys.platform: - os.nice(20) - except: - print("Unable to set lowest process priority") - -def set_process_dpi_aware(): - if sys.platform[0:3] == 'win': - windll.user32.SetProcessDPIAware(True) +import os +import sys + +if sys.platform[0:3] == 'win': + from ctypes import windll + from ctypes import wintypes + +def set_process_lowest_prio(): + try: + if sys.platform[0:3] == 'win': + GetCurrentProcess = windll.kernel32.GetCurrentProcess + GetCurrentProcess.restype = wintypes.HANDLE + SetPriorityClass = windll.kernel32.SetPriorityClass + SetPriorityClass.argtypes = (wintypes.HANDLE, wintypes.DWORD) + SetPriorityClass ( GetCurrentProcess(), 0x00000040 ) + elif 'darwin' in sys.platform: + os.nice(10) + elif 'linux' in sys.platform: + os.nice(20) + except: + print("Unable to set lowest process priority") + +def set_process_dpi_aware(): + if sys.platform[0:3] == 'win': + windll.user32.SetProcessDPIAware(True) diff --git a/utils/pickle_utils.py b/utils/pickle_utils.py index 37c4c72..1f5f9dd 100644 --- a/utils/pickle_utils.py +++ b/utils/pickle_utils.py @@ -1,9 +1,9 @@ -class AntiPickler(): - def __init__(self, obj): - self.obj = obj - - def __getstate__(self): - return dict() - - def __setstate__(self, d): +class AntiPickler(): + def __init__(self, obj): + self.obj = obj + + def __getstate__(self): + return dict() + + def __setstate__(self, d): self.__dict__.update(d) \ No newline at end of file diff --git a/utils/random_utils.py b/utils/random_utils.py index 7b3af6e..1891a67 100644 --- a/utils/random_utils.py +++ b/utils/random_utils.py @@ -1,14 +1,14 @@ -import numpy as np - -def random_normal( size=(1,), trunc_val = 2.5 ): - len = np.array(size).prod() - result = np.empty ( (len,) , dtype=np.float32) - - for i in range (len): - while True: - x = np.random.normal() - if x >= -trunc_val and x <= trunc_val: - break - result[i] = (x / trunc_val) - - return result.reshape ( size ) +import numpy as np + +def random_normal( size=(1,), trunc_val = 2.5 ): + len = np.array(size).prod() + result = np.empty ( (len,) , dtype=np.float32) + + for i in range (len): + while True: + x = np.random.normal() + if x >= -trunc_val and x <= trunc_val: + break + result[i] = (x / trunc_val) + + return result.reshape ( size ) diff --git a/utils/std_utils.py b/utils/std_utils.py index 2f23be9..3e977fa 100644 --- a/utils/std_utils.py +++ b/utils/std_utils.py @@ -1,36 +1,36 @@ -import os -import sys - -class suppress_stdout_stderr(object): - def __enter__(self): - self.outnull_file = open(os.devnull, 'w') - self.errnull_file = open(os.devnull, 'w') - - self.old_stdout_fileno_undup = sys.stdout.fileno() - self.old_stderr_fileno_undup = sys.stderr.fileno() - - self.old_stdout_fileno = os.dup ( sys.stdout.fileno() ) - self.old_stderr_fileno = os.dup ( sys.stderr.fileno() ) - - self.old_stdout = sys.stdout - self.old_stderr = sys.stderr - - os.dup2 ( self.outnull_file.fileno(), self.old_stdout_fileno_undup ) - os.dup2 ( self.errnull_file.fileno(), self.old_stderr_fileno_undup ) - - sys.stdout = self.outnull_file - sys.stderr = self.errnull_file - return self - - def __exit__(self, *_): - sys.stdout = self.old_stdout - sys.stderr = self.old_stderr - - os.dup2 ( self.old_stdout_fileno, self.old_stdout_fileno_undup ) - os.dup2 ( self.old_stderr_fileno, self.old_stderr_fileno_undup ) - - os.close ( self.old_stdout_fileno ) - os.close ( self.old_stderr_fileno ) - - self.outnull_file.close() - self.errnull_file.close() +import os +import sys + +class suppress_stdout_stderr(object): + def __enter__(self): + self.outnull_file = open(os.devnull, 'w') + self.errnull_file = open(os.devnull, 'w') + + self.old_stdout_fileno_undup = sys.stdout.fileno() + self.old_stderr_fileno_undup = sys.stderr.fileno() + + self.old_stdout_fileno = os.dup ( sys.stdout.fileno() ) + self.old_stderr_fileno = os.dup ( sys.stderr.fileno() ) + + self.old_stdout = sys.stdout + self.old_stderr = sys.stderr + + os.dup2 ( self.outnull_file.fileno(), self.old_stdout_fileno_undup ) + os.dup2 ( self.errnull_file.fileno(), self.old_stderr_fileno_undup ) + + sys.stdout = self.outnull_file + sys.stderr = self.errnull_file + return self + + def __exit__(self, *_): + sys.stdout = self.old_stdout + sys.stderr = self.old_stderr + + os.dup2 ( self.old_stdout_fileno, self.old_stdout_fileno_undup ) + os.dup2 ( self.old_stderr_fileno, self.old_stderr_fileno_undup ) + + os.close ( self.old_stdout_fileno ) + os.close ( self.old_stderr_fileno ) + + self.outnull_file.close() + self.errnull_file.close() diff --git a/utils/struct_utils.py b/utils/struct_utils.py index cc63559..ff79a39 100644 --- a/utils/struct_utils.py +++ b/utils/struct_utils.py @@ -1,5 +1,5 @@ -import struct - -def struct_unpack(data, counter, fmt): - fmt_size = struct.calcsize(fmt) - return (counter+fmt_size,) + struct.unpack (fmt, data[counter:counter+fmt_size]) +import struct + +def struct_unpack(data, counter, fmt): + fmt_size = struct.calcsize(fmt) + return (counter+fmt_size,) + struct.unpack (fmt, data[counter:counter+fmt_size]) From 353f3813e73589b13d5977ab606aec9a8aaa9dda Mon Sep 17 00:00:00 2001 From: seranus <=> Date: Mon, 29 Jul 2019 13:50:20 +0200 Subject: [PATCH 5/9] merge --- main.py | 2 + models/ModelBase.py | 618 ------------------------------------------ requirements-cuda.txt | 2 +- 3 files changed, 3 insertions(+), 619 deletions(-) diff --git a/main.py b/main.py index ebce793..6f80fb1 100644 --- a/main.py +++ b/main.py @@ -7,6 +7,8 @@ from utils import Path_utils from utils import os_utils from pathlib import Path +train_args = r'python3 main.py train --training-data-src-dir /media/user/5246EBF746EBD9AD/dfl/DFL/workspace/data_src/aligned/ --training-data-dst-dir /media/user/5246EBF746EBD9AD/dfl/DFL/workspace/data_dst/aligned/ --model-dir /media/user/5246EBF746EBD9AD/generic-fs/128h-sae-liaf/ --model SAE' + if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 6): raise Exception("This program requires at least Python 3.6") diff --git a/models/ModelBase.py b/models/ModelBase.py index ff1d751..b823a74 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -1,620 +1,3 @@ -<<<<<<< HEAD -import colorsys -import inspect -import json -import os -import pickle -import shutil -import time -from pathlib import Path - -import cv2 -import numpy as np - -import imagelib -from interact import interact as io -from nnlib import nnlib -from samplelib import SampleGeneratorBase -from utils import Path_utils, std_utils -from utils.cv2_utils import * - -''' -You can implement your own model. Check examples. -''' -class ModelBase(object): - - - def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, pretraining_data_path=None, debug = False, device_args = None, - ask_enable_autobackup=True, - ask_write_preview_history=True, - ask_target_iter=True, - ask_batch_size=True, - ask_sort_by_yaw=True, - ask_random_flip=True, - ask_src_scale_mod=True): - - device_args['force_gpu_idx'] = device_args.get('force_gpu_idx',-1) - device_args['cpu_only'] = device_args.get('cpu_only',False) - - if device_args['force_gpu_idx'] == -1 and not device_args['cpu_only']: - idxs_names_list = nnlib.device.getValidDevicesIdxsWithNamesList() - if len(idxs_names_list) > 1: - io.log_info ("You have multi GPUs in a system: ") - for idx, name in idxs_names_list: - io.log_info ("[%d] : %s" % (idx, name) ) - - device_args['force_gpu_idx'] = io.input_int("Which GPU idx to choose? ( skip: best GPU ) : ", -1, [ x[0] for x in idxs_names_list] ) - self.device_args = device_args - - self.device_config = nnlib.DeviceConfig(allow_growth=False, **self.device_args) - - io.log_info ("Loading model...") - - self.model_path = model_path - self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat') ) - - self.training_data_src_path = training_data_src_path - self.training_data_dst_path = training_data_dst_path - self.pretraining_data_path = pretraining_data_path - - self.src_images_paths = None - self.dst_images_paths = None - self.src_yaw_images_paths = None - self.dst_yaw_images_paths = None - self.src_data_generator = None - self.dst_data_generator = None - self.debug = debug - self.is_training_mode = (training_data_src_path is not None and training_data_dst_path is not None) - - self.iter = 0 - self.options = {} - self.loss_history = [] - self.sample_for_preview = None - - model_data = {} - if self.model_data_path.exists(): - model_data = pickle.loads ( self.model_data_path.read_bytes() ) - self.iter = max( model_data.get('iter',0), model_data.get('epoch',0) ) - if 'epoch' in self.options: - self.options.pop('epoch') - if self.iter != 0: - self.options = model_data['options'] - self.loss_history = model_data.get('loss_history', []) - self.sample_for_preview = model_data.get('sample_for_preview', None) - - ask_override = self.is_training_mode and self.iter != 0 and io.input_in_time ("Press enter in 2 seconds to override model settings.", 5 if io.is_colab() else 2 ) - - yn_str = {True:'y',False:'n'} - - if self.iter == 0: - io.log_info ("\nModel first run. Enter model options as default for each run.") - - if ask_enable_autobackup and (self.iter == 0 or ask_override): - default_autobackup = False if self.iter == 0 else self.options.get('autobackup',False) - self.options['autobackup'] = io.input_bool("Enable autobackup? (y/n ?:help skip:%s) : " % (yn_str[default_autobackup]) , default_autobackup, help_message="Autobackup model files with preview every hour for last 15 hours. Latest backup located in model/<>_autobackups/01") - else: - self.options['autobackup'] = self.options.get('autobackup', False) - - if ask_write_preview_history and (self.iter == 0 or ask_override): - default_write_preview_history = False if self.iter == 0 else self.options.get('write_preview_history',False) - self.options['write_preview_history'] = io.input_bool("Write preview history? (y/n ?:help skip:%s) : " % (yn_str[default_write_preview_history]) , default_write_preview_history, help_message="Preview history will be writed to _history folder.") - else: - self.options['write_preview_history'] = self.options.get('write_preview_history', False) - - if (self.iter == 0 or ask_override) and self.options['write_preview_history'] and io.is_support_windows(): - choose_preview_history = io.input_bool("Choose image for the preview history? (y/n skip:%s) : " % (yn_str[False]) , False) - else: - choose_preview_history = False - - if ask_target_iter: - if (self.iter == 0 or ask_override): - self.options['target_iter'] = max(0, io.input_int("Target iteration (skip:unlimited/default) : ", 0)) - else: - self.options['target_iter'] = max(model_data.get('target_iter',0), self.options.get('target_epoch',0)) - if 'target_epoch' in self.options: - self.options.pop('target_epoch') - - if ask_batch_size and (self.iter == 0 or ask_override): - default_batch_size = 0 if self.iter == 0 else self.options.get('batch_size',0) - self.options['batch_size'] = max(0, io.input_int("Batch_size (?:help skip:%d) : " % (default_batch_size), default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually.")) - else: - self.options['batch_size'] = self.options.get('batch_size', 0) - - if ask_sort_by_yaw: - if (self.iter == 0 or ask_override): - default_sort_by_yaw = self.options.get('sort_by_yaw', False) - self.options['sort_by_yaw'] = io.input_bool("Feed faces to network sorted by yaw? (y/n ?:help skip:%s) : " % (yn_str[default_sort_by_yaw]), default_sort_by_yaw, help_message="NN will not learn src face directions that don't match dst face directions. Do not use if the dst face has hair that covers the jaw." ) - else: - self.options['sort_by_yaw'] = self.options.get('sort_by_yaw', False) - - if ask_random_flip: - if (self.iter == 0 or ask_override): - self.options['random_flip'] = io.input_bool("Flip faces randomly? (y/n ?:help skip:y) : ", True, help_message="Predicted face will look more naturally without this option, but src faceset should cover all face directions as dst faceset.") - else: - self.options['random_flip'] = self.options.get('random_flip', True) - - if ask_src_scale_mod: - if (self.iter == 0): - self.options['src_scale_mod'] = np.clip( io.input_int("Src face scale modifier % ( -30...30, ?:help skip:0) : ", 0, help_message="If src face shape is wider than dst, try to decrease this value to get a better result."), -30, 30) - else: - self.options['src_scale_mod'] = self.options.get('src_scale_mod', 0) - - self.autobackup = self.options.get('autobackup', False) - if not self.autobackup and 'autobackup' in self.options: - self.options.pop('autobackup') - - self.write_preview_history = self.options.get('write_preview_history', False) - if not self.write_preview_history and 'write_preview_history' in self.options: - self.options.pop('write_preview_history') - - self.target_iter = self.options.get('target_iter',0) - if self.target_iter == 0 and 'target_iter' in self.options: - self.options.pop('target_iter') - - self.batch_size = self.options.get('batch_size',0) - self.sort_by_yaw = self.options.get('sort_by_yaw',False) - self.random_flip = self.options.get('random_flip',True) - - self.src_scale_mod = self.options.get('src_scale_mod',0) - if self.src_scale_mod == 0 and 'src_scale_mod' in self.options: - self.options.pop('src_scale_mod') - - self.onInitializeOptions(self.iter == 0, ask_override) - - nnlib.import_all(self.device_config) - self.keras = nnlib.keras - self.K = nnlib.keras.backend - - self.onInitialize() - - self.options['batch_size'] = self.batch_size - - if self.debug or self.batch_size == 0: - self.batch_size = 1 - - if self.is_training_mode: - if self.device_args['force_gpu_idx'] == -1: - self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) ) - self.autobackups_path = self.model_path / ( '%s_autobackups' % (self.get_model_name()) ) - else: - self.preview_history_path = self.model_path / ( '%d_%s_history' % (self.device_args['force_gpu_idx'], self.get_model_name()) ) - self.autobackups_path = self.model_path / ( '%d_%s_autobackups' % (self.device_args['force_gpu_idx'], self.get_model_name()) ) - - if self.autobackup: - self.autobackup_current_hour = time.localtime().tm_hour - - if not self.autobackups_path.exists(): - self.autobackups_path.mkdir(exist_ok=True) - - if self.write_preview_history or io.is_colab(): - if not self.preview_history_path.exists(): - self.preview_history_path.mkdir(exist_ok=True) - else: - if self.iter == 0: - for filename in Path_utils.get_image_paths(self.preview_history_path): - Path(filename).unlink() - - if self.generator_list is None: - raise ValueError( 'You didnt set_training_data_generators()') - else: - for i, generator in enumerate(self.generator_list): - if not isinstance(generator, SampleGeneratorBase): - raise ValueError('training data generator is not subclass of SampleGeneratorBase') - - if self.sample_for_preview is None or choose_preview_history: - if choose_preview_history and io.is_support_windows(): - wnd_name = "[p] - next. [enter] - confirm." - io.named_window(wnd_name) - io.capture_keys(wnd_name) - choosed = False - while not choosed: - self.sample_for_preview = self.generate_next_sample() - preview = self.get_static_preview() - io.show_image( wnd_name, (preview*255).astype(np.uint8) ) - - while True: - key_events = io.get_key_events(wnd_name) - key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) - if key == ord('\n') or key == ord('\r'): - choosed = True - break - elif key == ord('p'): - break - - try: - io.process_messages(0.1) - except KeyboardInterrupt: - choosed = True - - io.destroy_window(wnd_name) - else: - self.sample_for_preview = self.generate_next_sample() - self.last_sample = self.sample_for_preview - model_summary_text = [] - - model_summary_text += ["===== Model summary ====="] - model_summary_text += ["== Model name: " + self.get_model_name()] - model_summary_text += ["=="] - model_summary_text += ["== Current iteration: " + str(self.iter)] - model_summary_text += ["=="] - model_summary_text += ["== Model options:"] - for key in self.options.keys(): - model_summary_text += ["== |== %s : %s" % (key, self.options[key])] - - if self.device_config.multi_gpu: - model_summary_text += ["== |== multi_gpu : True "] - - model_summary_text += ["== Running on:"] - if self.device_config.cpu_only: - model_summary_text += ["== |== [CPU]"] - else: - for idx in self.device_config.gpu_idxs: - model_summary_text += ["== |== [%d : %s]" % (idx, nnlib.device.getDeviceName(idx))] - - if not self.device_config.cpu_only and self.device_config.gpu_vram_gb[0] == 2: - model_summary_text += ["=="] - model_summary_text += ["== WARNING: You are using 2GB GPU. Result quality may be significantly decreased."] - model_summary_text += ["== If training does not start, close all programs and try again."] - model_summary_text += ["== Also you can disable Windows Aero Desktop to get extra free VRAM."] - model_summary_text += ["=="] - - model_summary_text += ["========================="] - model_summary_text = "\r\n".join (model_summary_text) - self.model_summary_text = model_summary_text - io.log_info(model_summary_text) - - #overridable - def onInitializeOptions(self, is_first_run, ask_override): - pass - - #overridable - def onInitialize(self): - ''' - initialize your keras models - - store and retrieve your model options in self.options[''] - - check example - ''' - pass - - #overridable - def onSave(self): - #save your keras models here - pass - - #overridable - def onTrainOneIter(self, sample, generator_list): - #train your keras models here - - #return array of losses - return ( ('loss_src', 0), ('loss_dst', 0) ) - - #overridable - def onGetPreview(self, sample): - #you can return multiple previews - #return [ ('preview_name',preview_rgb), ... ] - return [] - - #overridable if you want model name differs from folder name - def get_model_name(self): - return Path(inspect.getmodule(self).__file__).parent.name.rsplit("_", 1)[1] - - #overridable , return [ [model, filename],... ] list - def get_model_filename_list(self): - return [] - - #overridable - def get_converter(self): - raise NotImplementedError - #return existing or your own converter which derived from base - - def get_target_iter(self): - return self.target_iter - - def is_reached_iter_goal(self): - return self.target_iter != 0 and self.iter >= self.target_iter - - #multi gpu in keras actually is fake and doesn't work for training https://github.com/keras-team/keras/issues/11976 - #def to_multi_gpu_model_if_possible (self, models_list): - # if len(self.device_config.gpu_idxs) > 1: - # #make batch_size to divide on GPU count without remainder - # self.batch_size = int( self.batch_size / len(self.device_config.gpu_idxs) ) - # if self.batch_size == 0: - # self.batch_size = 1 - # self.batch_size *= len(self.device_config.gpu_idxs) - # - # result = [] - # for model in models_list: - # for i in range( len(model.output_names) ): - # model.output_names = 'output_%d' % (i) - # result += [ nnlib.keras.utils.multi_gpu_model( model, self.device_config.gpu_idxs ) ] - # - # return result - # else: - # return models_list - - def get_previews(self): - return self.onGetPreview ( self.last_sample ) - - def get_static_preview(self): - return self.onGetPreview (self.sample_for_preview)[0][1] #first preview, and bgr - - def save(self): - summary_path = self.get_strpath_storage_for_file('summary.txt') - Path( summary_path ).write_text(self.model_summary_text) - self.onSave() - - model_data = { - 'iter': self.iter, - 'options': self.options, - 'loss_history': self.loss_history, - 'sample_for_preview' : self.sample_for_preview - } - self.model_data_path.write_bytes( pickle.dumps(model_data) ) - - bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ] - bckp_filename_list += [ str(summary_path), str(self.model_data_path) ] - - if self.autobackup: - current_hour = time.localtime().tm_hour - if self.autobackup_current_hour != current_hour: - self.autobackup_current_hour = current_hour - - for i in range(15,0,-1): - idx_str = '%.2d' % i - next_idx_str = '%.2d' % (i+1) - - idx_backup_path = self.autobackups_path / idx_str - next_idx_packup_path = self.autobackups_path / next_idx_str - - if idx_backup_path.exists(): - if i == 15: - Path_utils.delete_all_files(idx_backup_path) - else: - next_idx_packup_path.mkdir(exist_ok=True) - Path_utils.move_all_files (idx_backup_path, next_idx_packup_path) - - if i == 1: - idx_backup_path.mkdir(exist_ok=True) - for filename in bckp_filename_list: - shutil.copy ( str(filename), str(idx_backup_path / Path(filename).name) ) - - previews = self.get_previews() - plist = [] - for i in range(len(previews)): - name, bgr = previews[i] - plist += [ (bgr, idx_backup_path / ( ('preview_%s.jpg') % (name)) ) ] - - for preview, filepath in plist: - preview_lh = ModelBase.get_loss_history_preview(self.loss_history, self.iter, preview.shape[1], preview.shape[2]) - img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) - cv2_imwrite (filepath, img ) - - def load_weights_safe(self, model_filename_list, optimizer_filename_list=[]): - for model, filename in model_filename_list: - filename = self.get_strpath_storage_for_file(filename) - if Path(filename).exists(): - model.load_weights(filename) - - if len(optimizer_filename_list) != 0: - opt_filename = self.get_strpath_storage_for_file('opt.h5') - if Path(opt_filename).exists(): - try: - with open(opt_filename, "rb") as f: - d = pickle.loads(f.read()) - - for x in optimizer_filename_list: - opt, filename = x - if filename in d: - weights = d[filename].get('weights', None) - if weights: - opt.set_weights(weights) - print("set ok") - except Exception as e: - print ("Unable to load ", opt_filename) - - - def save_weights_safe(self, model_filename_list): - for model, filename in model_filename_list: - filename = self.get_strpath_storage_for_file(filename) - model.save_weights( filename + '.tmp' ) - - rename_list = model_filename_list - - """ - #unused - , optimizer_filename_list=[] - if len(optimizer_filename_list) != 0: - opt_filename = self.get_strpath_storage_for_file('opt.h5') - - try: - d = {} - for opt, filename in optimizer_filename_list: - fd = {} - symbolic_weights = getattr(opt, 'weights') - if symbolic_weights: - fd['weights'] = self.K.batch_get_value(symbolic_weights) - - d[filename] = fd - - with open(opt_filename+'.tmp', 'wb') as f: - f.write( pickle.dumps(d) ) - - rename_list += [('', 'opt.h5')] - except Exception as e: - print ("Unable to save ", opt_filename) - """ - - for _, filename in rename_list: - filename = self.get_strpath_storage_for_file(filename) - source_filename = Path(filename+'.tmp') - if source_filename.exists(): - target_filename = Path(filename) - if target_filename.exists(): - target_filename.unlink() - source_filename.rename ( str(target_filename) ) - - def debug_one_iter(self): - images = [] - for generator in self.generator_list: - for i,batch in enumerate(next(generator)): - if len(batch.shape) == 4: - images.append( batch[0] ) - - return imagelib.equalize_and_stack_square (images) - - def generate_next_sample(self): - return [next(generator) for generator in self.generator_list] - - def train_one_iter(self): - sample = self.generate_next_sample() - iter_time = time.time() - losses = self.onTrainOneIter(sample, self.generator_list) - iter_time = time.time() - iter_time - self.last_sample = sample - - self.loss_history.append ( [float(loss[1]) for loss in losses] ) - - if self.iter % 10 == 0: - plist = [] - - if io.is_colab(): - previews = self.get_previews() - for i in range(len(previews)): - name, bgr = previews[i] - plist += [ (bgr, self.get_strpath_storage_for_file('preview_%s.jpg' % (name) ) ) ] - - if self.write_preview_history: - plist += [ (self.get_static_preview(), str (self.preview_history_path / ('%.6d.jpg' % (self.iter))) ) ] - - for preview, filepath in plist: - preview_lh = ModelBase.get_loss_history_preview(self.loss_history, self.iter, preview.shape[1], preview.shape[2]) - img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) - cv2_imwrite (filepath, img ) - - - self.iter += 1 - - return self.iter, iter_time - - def pass_one_iter(self): - self.last_sample = self.generate_next_sample() - - def finalize(self): - nnlib.finalize_all() - - def is_first_run(self): - return self.iter == 0 - - def is_debug(self): - return self.debug - - def set_batch_size(self, batch_size): - self.batch_size = batch_size - - def get_batch_size(self): - return self.batch_size - - def get_iter(self): - return self.iter - - def get_loss_history(self): - return self.loss_history - - def set_training_data_generators (self, generator_list): - self.generator_list = generator_list - - def get_training_data_generators (self): - return self.generator_list - - def get_model_root_path(self): - return self.model_path - - def get_strpath_storage_for_file(self, filename): - if self.device_args['force_gpu_idx'] == -1: - return str( self.model_path / ( self.get_model_name() + '_' + filename) ) - else: - return str( self.model_path / ( str(self.device_args['force_gpu_idx']) + '_' + self.get_model_name() + '_' + filename) ) - - def set_vram_batch_requirements (self, d): - #example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48} - keys = [x for x in d.keys()] - - if self.device_config.cpu_only: - if self.batch_size == 0: - self.batch_size = 2 - else: - if self.batch_size == 0: - for x in keys: - if self.device_config.gpu_vram_gb[0] <= x: - self.batch_size = d[x] - break - - if self.batch_size == 0: - self.batch_size = d[ keys[-1] ] - - @staticmethod - def get_loss_history_preview(loss_history, iter, w, c): - loss_history = np.array (loss_history.copy()) - - lh_height = 100 - lh_img = np.ones ( (lh_height,w,c) ) * 0.1 - - if len(loss_history) != 0: - loss_count = len(loss_history[0]) - lh_len = len(loss_history) - - l_per_col = lh_len / w - plist_max = [ [ max (0.0, loss_history[int(col*l_per_col)][p], - *[ loss_history[i_ab][p] - for i_ab in range( int(col*l_per_col), int((col+1)*l_per_col) ) - ] - ) - for p in range(loss_count) - ] - for col in range(w) - ] - - plist_min = [ [ min (plist_max[col][p], loss_history[int(col*l_per_col)][p], - *[ loss_history[i_ab][p] - for i_ab in range( int(col*l_per_col), int((col+1)*l_per_col) ) - ] - ) - for p in range(loss_count) - ] - for col in range(w) - ] - - plist_abs_max = np.mean(loss_history[ len(loss_history) // 5 : ]) * 2 - - for col in range(0, w): - for p in range(0,loss_count): - point_color = [1.0]*c - point_color[0:3] = colorsys.hsv_to_rgb ( p * (1.0/loss_count), 1.0, 1.0 ) - - ph_max = int ( (plist_max[col][p] / plist_abs_max) * (lh_height-1) ) - ph_max = np.clip( ph_max, 0, lh_height-1 ) - - ph_min = int ( (plist_min[col][p] / plist_abs_max) * (lh_height-1) ) - ph_min = np.clip( ph_min, 0, lh_height-1 ) - - for ph in range(ph_min, ph_max+1): - lh_img[ (lh_height-ph-1), col ] = point_color - - lh_lines = 5 - lh_line_height = (lh_height-1)/lh_lines - for i in range(0,lh_lines+1): - lh_img[ int(i*lh_line_height), : ] = (0.8,)*c - - last_line_t = int((lh_lines-1)*lh_line_height) - last_line_b = int(lh_lines*lh_line_height) - - lh_text = 'Iter: %d' % (iter) if iter != 0 else '' - - lh_img[last_line_t:last_line_b, 0:w] += imagelib.get_text_image ( (last_line_b-last_line_t,w,c), lh_text, color=[0.8]*c ) - return lh_img -======= import colorsys import inspect import json @@ -1232,4 +615,3 @@ class ModelBase(object): lh_img[last_line_t:last_line_b, 0:w] += imagelib.get_text_image ( (last_line_b-last_line_t,w,c), lh_text, color=[0.8]*c ) return lh_img ->>>>>>> upstream/master diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 06b8d42..2017ecf 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -2,7 +2,7 @@ numpy==1.16.3 h5py==2.9.0 Keras==2.2.4 opencv-python==4.0.0.21 -tensorflow-gpu==1.12.0 +tensorflow-gpu==1.14.0 plaidml==0.6.0 plaidml-keras==0.5.0 scikit-image From 5983acac603cff4ecb006e44166b77099cbb44af Mon Sep 17 00:00:00 2001 From: seranus <=> Date: Tue, 30 Jul 2019 13:22:12 +0200 Subject: [PATCH 6/9] dilutation jawline cut --- converters/ConverterMasked.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/converters/ConverterMasked.py b/converters/ConverterMasked.py index cfa07fa..4026ecd 100644 --- a/converters/ConverterMasked.py +++ b/converters/ConverterMasked.py @@ -12,6 +12,8 @@ from utils.pickle_utils import AntiPickler from .Converter import Converter +import math + ''' default_mode = {1:'overlay', @@ -251,8 +253,18 @@ class ConverterMasked(Converter): if ero > 0: img_face_mask_aaa = cv2.erode(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) elif ero < 0: + inverted_old_mask = 1 - img_face_mask_aaa + debugs += [inverted_old_mask.copy()] + left_jaw_landmark = img_face_landmarks[0] + right_jaw_landmark = img_face_landmarks[16] + + inverted_old_mask[0: int(left_jaw_landmark[1]), 0: int(right_jaw_landmark[0])] = 0 + inverted_old_mask[0: int(right_jaw_landmark[1]), int(left_jaw_landmark[0]): -1] = 0 img_face_mask_aaa = cv2.dilate(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(-ero,-ero)), iterations = 1 ) + # img_face_mask_aaa = img_face_mask_aaa - inverted_old_mask + # img_face_mask_aaa = np.clip(img_face_mask_aaa, 0, 1) + img_mask_blurry_aaa = img_face_mask_aaa if self.clip_hborder_mask_per > 0: #clip hborder before blur @@ -278,6 +290,10 @@ class ConverterMasked(Converter): img_mask_blurry_aaa = cv2.blur(img_mask_blurry_aaa, (blur, blur) ) img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) + + if ero < 0: + img_mask_blurry_aaa = img_mask_blurry_aaa - inverted_old_mask + img_mask_blurry_aaa = np.clip(img_mask_blurry_aaa, 0, 1) face_mask_blurry_aaa = cv2.warpAffine( img_mask_blurry_aaa, face_mat, (output_size, output_size) ) if debug: From 70cf79f3780322e97f532f70e1b3107416649365 Mon Sep 17 00:00:00 2001 From: seranus <=> Date: Tue, 30 Jul 2019 13:27:45 +0200 Subject: [PATCH 7/9] removed debug --- converters/ConverterMasked.py | 1 - 1 file changed, 1 deletion(-) diff --git a/converters/ConverterMasked.py b/converters/ConverterMasked.py index 4026ecd..3496622 100644 --- a/converters/ConverterMasked.py +++ b/converters/ConverterMasked.py @@ -254,7 +254,6 @@ class ConverterMasked(Converter): img_face_mask_aaa = cv2.erode(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) elif ero < 0: inverted_old_mask = 1 - img_face_mask_aaa - debugs += [inverted_old_mask.copy()] left_jaw_landmark = img_face_landmarks[0] right_jaw_landmark = img_face_landmarks[16] From 8eeb7632a0d913eda672935587a829a553cd70c2 Mon Sep 17 00:00:00 2001 From: seranus <=> Date: Wed, 31 Jul 2019 17:33:48 +0200 Subject: [PATCH 8/9] caf mask update --- .gitignore | 4 +++- converters/ConverterMasked.py | 20 +++----------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 55123e8..458a632 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ !*.jpg !requirements* !Dockerfile* -!*.sh \ No newline at end of file +!*.sh +train.py +convert.py diff --git a/converters/ConverterMasked.py b/converters/ConverterMasked.py index 3496622..4f6f372 100644 --- a/converters/ConverterMasked.py +++ b/converters/ConverterMasked.py @@ -12,8 +12,6 @@ from utils.pickle_utils import AntiPickler from .Converter import Converter -import math - ''' default_mode = {1:'overlay', @@ -253,24 +251,16 @@ class ConverterMasked(Converter): if ero > 0: img_face_mask_aaa = cv2.erode(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) elif ero < 0: - inverted_old_mask = 1 - img_face_mask_aaa - left_jaw_landmark = img_face_landmarks[0] - right_jaw_landmark = img_face_landmarks[16] - - inverted_old_mask[0: int(left_jaw_landmark[1]), 0: int(right_jaw_landmark[0])] = 0 - inverted_old_mask[0: int(right_jaw_landmark[1]), int(left_jaw_landmark[0]): -1] = 0 img_face_mask_aaa = cv2.dilate(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(-ero,-ero)), iterations = 1 ) - # img_face_mask_aaa = img_face_mask_aaa - inverted_old_mask - # img_face_mask_aaa = np.clip(img_face_mask_aaa, 0, 1) - img_mask_blurry_aaa = img_face_mask_aaa if self.clip_hborder_mask_per > 0: #clip hborder before blur prd_hborder_rect_mask_a = np.ones ( prd_face_mask_a.shape, dtype=np.float32) prd_border_size = int ( prd_hborder_rect_mask_a.shape[1] * self.clip_hborder_mask_per ) prd_hborder_rect_mask_a[:,0:prd_border_size,:] = 0 - prd_hborder_rect_mask_a[:,-prd_border_size:,:] = 0 + prd_hborder_rect_mask_a[:,-prd_border_size:,:] = 0 + prd_hborder_rect_mask_a[-prd_border_size:,:,:] = 0 prd_hborder_rect_mask_a = np.expand_dims(cv2.blur(prd_hborder_rect_mask_a, (prd_border_size, prd_border_size) ),-1) img_prd_hborder_rect_mask_a = cv2.warpAffine( prd_hborder_rect_mask_a, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) @@ -289,10 +279,6 @@ class ConverterMasked(Converter): img_mask_blurry_aaa = cv2.blur(img_mask_blurry_aaa, (blur, blur) ) img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) - - if ero < 0: - img_mask_blurry_aaa = img_mask_blurry_aaa - inverted_old_mask - img_mask_blurry_aaa = np.clip(img_mask_blurry_aaa, 0, 1) face_mask_blurry_aaa = cv2.warpAffine( img_mask_blurry_aaa, face_mat, (output_size, output_size) ) if debug: @@ -447,4 +433,4 @@ class ConverterMasked(Converter): if debug: debugs += [out_img.copy()] - return debugs if debug else out_img + return debugs if debug else out_img \ No newline at end of file From 035a1c6ab4e30b307b89425408d741e434b43625 Mon Sep 17 00:00:00 2001 From: Jeremy Hummel Date: Wed, 14 Aug 2019 14:59:37 -0700 Subject: [PATCH 9/9] fix line endings --- .github/ISSUE_TEMPLATE.md | 36 +- .gitignore | 20 +- CODEGUIDELINES | 8 +- LICENSE | 1348 ++++---- README.md | 98 +- converters/Converter.py | 100 +- converters/ConverterAvatar.py | 140 +- converters/ConverterImage.py | 100 +- converters/ConverterMasked.py | 870 +++--- converters/__init__.py | 8 +- doc/doc_build_and_repository_info.md | 8 +- doc/doc_prebuilt_windows_app.md | 48 +- doc/doc_ready_to_work_facesets.md | 22 +- doc/gallery/doc_gallery.md | 4 +- facelib/DLIBExtractor.py | 80 +- facelib/FANSegmentator.py | 278 +- facelib/FaceType.py | 66 +- facelib/LandmarksExtractor.py | 240 +- facelib/LandmarksProcessor.py | 772 ++--- facelib/MTCExtractor.py | 700 ++--- facelib/PoseEstimator.py | 626 ++-- facelib/S3FDExtractor.py | 196 +- facelib/__init__.py | 12 +- imagelib/DCSCN.py | 328 +- imagelib/IEPolys.py | 206 +- imagelib/__init__.py | 52 +- imagelib/blur.py | 284 +- imagelib/color_transfer.py | 382 +-- imagelib/common.py | 40 +- imagelib/draw.py | 26 +- imagelib/equalize_and_stack_square.py | 88 +- imagelib/estimate_sharpness.py | 554 ++-- imagelib/morph.py | 72 +- imagelib/reduce_colors.py | 28 +- imagelib/text.py | 128 +- imagelib/warp.py | 100 +- interact/__init__.py | 2 +- interact/interact.py | 808 ++--- joblib/SubprocessFunctionCaller.py | 84 +- joblib/SubprocessorBase.py | 576 ++-- joblib/__init__.py | 4 +- localization/__init__.py | 4 +- localization/localization.py | 60 +- main.py | 556 ++-- mainscripts/Converter.py | 782 ++--- mainscripts/Extractor.py | 1746 +++++------ mainscripts/MaskEditorTool.py | 1112 +++---- mainscripts/Sorter.py | 1606 +++++----- mainscripts/Trainer.py | 648 ++-- mainscripts/Util.py | 312 +- mainscripts/VideoEd.py | 398 +-- mathlib/__init__.py | 50 +- mathlib/umeyama.py | 142 +- models/Model_DEV_FANSEG/Model.py | 204 +- models/Model_DEV_FANSEG/__init__.py | 2 +- models/Model_DEV_POSEEST/Model.py | 242 +- models/Model_DEV_POSEEST/__init__.py | 2 +- models/Model_DF/Model.py | 342 +- models/Model_DF/__init__.py | 2 +- models/Model_H128/Model.py | 410 +-- models/Model_H128/__init__.py | 2 +- models/Model_H64/Model.py | 404 +-- models/Model_H64/__init__.py | 2 +- models/Model_LIAEF128/Model.py | 360 +-- models/Model_LIAEF128/__init__.py | 2 +- models/Model_RecycleGAN/Model.py | 966 +++--- models/Model_SAE/Model.py | 1400 ++++----- models/Model_SAE/__init__.py | 2 +- models/__init__.py | 10 +- nnlib/CAInitializer.py | 224 +- nnlib/__init__.py | 2 +- nnlib/device.py | 714 ++--- nnlib/nnlib.py | 2096 ++++++------- nnlib/pynvml.py | 3452 ++++++++++----------- requirements-colab.txt | 18 +- requirements-cpu.txt | 18 +- requirements-cuda.txt | 22 +- requirements-opencl.txt | 22 +- samplelib/Sample.py | 148 +- samplelib/SampleGeneratorBase.py | 48 +- samplelib/SampleGeneratorFace.py | 284 +- samplelib/SampleGeneratorFaceTemporal.py | 168 +- samplelib/SampleGeneratorImageTemporal.py | 156 +- samplelib/SampleLoader.py | 304 +- samplelib/SampleProcessor.py | 632 ++-- samplelib/__init__.py | 16 +- utils/DFLJPG.py | 606 ++-- utils/DFLPNG.py | 820 ++--- utils/Path_utils.py | 164 +- utils/cv2_utils.py | 44 +- utils/iter_utils.py | 140 +- utils/os_utils.py | 50 +- utils/pickle_utils.py | 16 +- utils/random_utils.py | 28 +- utils/std_utils.py | 72 +- utils/struct_utils.py | 10 +- 96 files changed, 15792 insertions(+), 15792 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f3d856b..e8ce932 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,19 +1,19 @@ -THIS IS NOT TECH SUPPORT FOR NEWBIE FAKERS -POST ONLY ISSUES RELATED TO BUGS OR CODE - -## Expected behavior - -*Describe, in some detail, what you are trying to do and what the output is that you expect from the program.* - -## Actual behavior - -*Describe, in some detail, what the program does instead. Be sure to include any error message or screenshots.* - -## Steps to reproduce - -*Describe, in some detail, the steps you tried that resulted in the behavior described above.* - -## Other relevant information -- **Command lined used (if not specified in steps to reproduce)**: main.py ... -- **Operating system and version:** Windows, macOS, Linux +THIS IS NOT TECH SUPPORT FOR NEWBIE FAKERS +POST ONLY ISSUES RELATED TO BUGS OR CODE + +## Expected behavior + +*Describe, in some detail, what you are trying to do and what the output is that you expect from the program.* + +## Actual behavior + +*Describe, in some detail, what the program does instead. Be sure to include any error message or screenshots.* + +## Steps to reproduce + +*Describe, in some detail, the steps you tried that resulted in the behavior described above.* + +## Other relevant information +- **Command lined used (if not specified in steps to reproduce)**: main.py ... +- **Operating system and version:** Windows, macOS, Linux - **Python version:** 3.5, 3.6.4, ... (if you are not using prebuilt windows binary) \ No newline at end of file diff --git a/.gitignore b/.gitignore index 458a632..493d67f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,10 @@ -* -!*.py -!*.md -!*.txt -!*.jpg -!requirements* -!Dockerfile* -!*.sh -train.py -convert.py +* +!*.py +!*.md +!*.txt +!*.jpg +!requirements* +!Dockerfile* +!*.sh +train.py +convert.py diff --git a/CODEGUIDELINES b/CODEGUIDELINES index 7e6541e..0d40a02 100644 --- a/CODEGUIDELINES +++ b/CODEGUIDELINES @@ -1,5 +1,5 @@ -Please don't ruin the code and this good (as I think) architecture. - -Please follow the same logic and brevity/pithiness. - +Please don't ruin the code and this good (as I think) architecture. + +Please follow the same logic and brevity/pithiness. + Don't abstract the code into huge classes if you only win some lines of code in one place, because this can prevent programmers from understanding it quickly. \ No newline at end of file diff --git a/LICENSE b/LICENSE index 818433e..94a9ed0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,674 +1,674 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md index fdce45d..023b5b4 100644 --- a/README.md +++ b/README.md @@ -1,53 +1,53 @@ <<<<<<< HEAD -![](doc/DFL_welcome.jpg) - -![](doc/logo_cuda.jpg)![](doc/logo_opencl.jpg)![](doc/logo_keras.jpg)![](doc/logo_tensorflow.jpg)![](doc/logo_plaidml.jpg) - -#deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets - -## **DeepFaceLab** is a tool that utilizes machine learning to replace faces in videos. - -If you like this software, please consider a donation. - -GOAL: next DeepFacelab update. - -[Donate via Yandex.Money](https://money.yandex.ru/to/41001142318065) - -[Donate via Paypal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=lepersorium@gmail.com&lc=US&no_note=0&item_name=Support+DeepFaceLab&cn=&curency_code=USD&bn=PP-DonationsBF:btn_donateCC_LG.gif:NonHosted) - -[Donate via Alipay](https://i.loli.net/2019/01/13/5c3ae3829809f.jpg) - -bitcoin:31mPd6DxPCzbpCMZk4k1koWAbErSyqkAXr - -- ### [Gallery](doc/gallery/doc_gallery.md) - -- ### Manuals: - -[English (google translated)](doc/manual_en_google_translated.pdf) - -[На русском](doc/manual_ru.pdf) - -- ### [Prebuilt windows app](doc/doc_prebuilt_windows_app.md) - -- ### Forks - -[Google Colab fork](https://github.com/chervonij/DFL-Colab) by @chervonij - -[Linux fork](https://github.com/lbfs/DeepFaceLab_Linux) by @lbfs - may be outdated - -- ### [Ready to work facesets](doc/doc_ready_to_work_facesets.md) - -- ### [Build and repository info](doc/doc_build_and_repository_info.md) - -- ### Communication groups: - -(Chinese) QQ group 951138799 for ML/AI experts - -[deepfakes (Chinese)](https://deepfakescn.com) - -[deepfakes (Chinese) (outdated) ](https://deepfakes.com.cn/) - -[reddit (English)](https://www.reddit.com/r/GifFakes/new/) +![](doc/DFL_welcome.jpg) + +![](doc/logo_cuda.jpg)![](doc/logo_opencl.jpg)![](doc/logo_keras.jpg)![](doc/logo_tensorflow.jpg)![](doc/logo_plaidml.jpg) + +#deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets + +## **DeepFaceLab** is a tool that utilizes machine learning to replace faces in videos. + +If you like this software, please consider a donation. + +GOAL: next DeepFacelab update. + +[Donate via Yandex.Money](https://money.yandex.ru/to/41001142318065) + +[Donate via Paypal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=lepersorium@gmail.com&lc=US&no_note=0&item_name=Support+DeepFaceLab&cn=&curency_code=USD&bn=PP-DonationsBF:btn_donateCC_LG.gif:NonHosted) + +[Donate via Alipay](https://i.loli.net/2019/01/13/5c3ae3829809f.jpg) + +bitcoin:31mPd6DxPCzbpCMZk4k1koWAbErSyqkAXr + +- ### [Gallery](doc/gallery/doc_gallery.md) + +- ### Manuals: + +[English (google translated)](doc/manual_en_google_translated.pdf) + +[На русском](doc/manual_ru.pdf) + +- ### [Prebuilt windows app](doc/doc_prebuilt_windows_app.md) + +- ### Forks + +[Google Colab fork](https://github.com/chervonij/DFL-Colab) by @chervonij + +[Linux fork](https://github.com/lbfs/DeepFaceLab_Linux) by @lbfs - may be outdated + +- ### [Ready to work facesets](doc/doc_ready_to_work_facesets.md) + +- ### [Build and repository info](doc/doc_build_and_repository_info.md) + +- ### Communication groups: + +(Chinese) QQ group 951138799 for ML/AI experts + +[deepfakes (Chinese)](https://deepfakescn.com) + +[deepfakes (Chinese) (outdated) ](https://deepfakes.com.cn/) + +[reddit (English)](https://www.reddit.com/r/GifFakes/new/) ======= ![](doc/DFL_welcome.jpg) diff --git a/converters/Converter.py b/converters/Converter.py index f6e90d1..b4e4213 100644 --- a/converters/Converter.py +++ b/converters/Converter.py @@ -1,50 +1,50 @@ -import copy -''' -You can implement your own Converter, check example ConverterMasked.py -''' - -class Converter(object): - TYPE_FACE = 0 #calls convert_face - TYPE_FACE_AVATAR = 1 #calls convert_face with avatar_operator_face - TYPE_IMAGE = 2 #calls convert_image without landmarks - TYPE_IMAGE_WITH_LANDMARKS = 3 #calls convert_image with landmarks - - #overridable - def __init__(self, predictor_func, type): - self.predictor_func = predictor_func - self.type = type - - #overridable - def on_cli_initialize(self): - #cli initialization - pass - - #overridable - def on_host_tick(self): - pass - - #overridable - def cli_convert_face (self, img_bgr, img_face_landmarks, debug, avaperator_face_bgr=None, **kwargs): - #return float32 image - #if debug , return tuple ( images of any size and channels, ...) - return image - - #overridable - def cli_convert_image (self, img_bgr, img_landmarks, debug): - #img_landmarks not None, if input image is png with embedded data - #return float32 image - #if debug , return tuple ( images of any size and channels, ...) - return image - - #overridable - def dummy_predict(self): - #do dummy predict here - pass - - def copy(self): - return copy.copy(self) - - def copy_and_set_predictor(self, predictor_func): - result = self.copy() - result.predictor_func = predictor_func - return result +import copy +''' +You can implement your own Converter, check example ConverterMasked.py +''' + +class Converter(object): + TYPE_FACE = 0 #calls convert_face + TYPE_FACE_AVATAR = 1 #calls convert_face with avatar_operator_face + TYPE_IMAGE = 2 #calls convert_image without landmarks + TYPE_IMAGE_WITH_LANDMARKS = 3 #calls convert_image with landmarks + + #overridable + def __init__(self, predictor_func, type): + self.predictor_func = predictor_func + self.type = type + + #overridable + def on_cli_initialize(self): + #cli initialization + pass + + #overridable + def on_host_tick(self): + pass + + #overridable + def cli_convert_face (self, img_bgr, img_face_landmarks, debug, avaperator_face_bgr=None, **kwargs): + #return float32 image + #if debug , return tuple ( images of any size and channels, ...) + return image + + #overridable + def cli_convert_image (self, img_bgr, img_landmarks, debug): + #img_landmarks not None, if input image is png with embedded data + #return float32 image + #if debug , return tuple ( images of any size and channels, ...) + return image + + #overridable + def dummy_predict(self): + #do dummy predict here + pass + + def copy(self): + return copy.copy(self) + + def copy_and_set_predictor(self, predictor_func): + result = self.copy() + result.predictor_func = predictor_func + return result diff --git a/converters/ConverterAvatar.py b/converters/ConverterAvatar.py index 35da3bc..4a9e9b5 100644 --- a/converters/ConverterAvatar.py +++ b/converters/ConverterAvatar.py @@ -1,70 +1,70 @@ -import time - -import cv2 -import numpy as np - -from facelib import FaceType, LandmarksProcessor -from joblib import SubprocessFunctionCaller -from utils.pickle_utils import AntiPickler - -from .Converter import Converter - -class ConverterAvatar(Converter): - - #override - def __init__(self, predictor_func, - predictor_input_size=0): - - super().__init__(predictor_func, Converter.TYPE_FACE_AVATAR) - - self.predictor_input_size = predictor_input_size - - #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower - predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ), - np.zeros ( (predictor_input_size,predictor_input_size,1), dtype=np.float32 ) ) - time.sleep(2) - - predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) - self.predictor_func_host = AntiPickler(predictor_func_host) - self.predictor_func = predictor_func - - #overridable - def on_host_tick(self): - self.predictor_func_host.obj.process_messages() - - #override - def cli_convert_face (self, img_bgr, img_face_landmarks, debug, avaperator_face_bgr=None, **kwargs): - if debug: - debugs = [img_bgr.copy()] - - img_size = img_bgr.shape[1], img_bgr.shape[0] - - img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) - img_face_mask_aaa = np.repeat(img_face_mask_a, 3, -1) - - output_size = self.predictor_input_size - face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=FaceType.FULL) - - dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size), flags=cv2.INTER_CUBIC ) - - predictor_input_dst_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) - prd_inp_dst_face_mask_a = predictor_input_dst_face_mask_a_0[...,np.newaxis] - - prd_inp_avaperator_face_bgr = cv2.resize (avaperator_face_bgr, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) - - prd_face_bgr = self.predictor_func ( prd_inp_avaperator_face_bgr, prd_inp_dst_face_mask_a ) - - out_img = img_bgr.copy() - out_img = cv2.warpAffine( prd_face_bgr, face_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - out_img = np.clip(out_img, 0.0, 1.0) - - if debug: - debugs += [out_img.copy()] - - out_img = np.clip( img_bgr*(1-img_face_mask_aaa) + (out_img*img_face_mask_aaa) , 0, 1.0 ) - - if debug: - debugs += [out_img.copy()] - - - return debugs if debug else out_img +import time + +import cv2 +import numpy as np + +from facelib import FaceType, LandmarksProcessor +from joblib import SubprocessFunctionCaller +from utils.pickle_utils import AntiPickler + +from .Converter import Converter + +class ConverterAvatar(Converter): + + #override + def __init__(self, predictor_func, + predictor_input_size=0): + + super().__init__(predictor_func, Converter.TYPE_FACE_AVATAR) + + self.predictor_input_size = predictor_input_size + + #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower + predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ), + np.zeros ( (predictor_input_size,predictor_input_size,1), dtype=np.float32 ) ) + time.sleep(2) + + predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) + self.predictor_func_host = AntiPickler(predictor_func_host) + self.predictor_func = predictor_func + + #overridable + def on_host_tick(self): + self.predictor_func_host.obj.process_messages() + + #override + def cli_convert_face (self, img_bgr, img_face_landmarks, debug, avaperator_face_bgr=None, **kwargs): + if debug: + debugs = [img_bgr.copy()] + + img_size = img_bgr.shape[1], img_bgr.shape[0] + + img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) + img_face_mask_aaa = np.repeat(img_face_mask_a, 3, -1) + + output_size = self.predictor_input_size + face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=FaceType.FULL) + + dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size), flags=cv2.INTER_CUBIC ) + + predictor_input_dst_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) + prd_inp_dst_face_mask_a = predictor_input_dst_face_mask_a_0[...,np.newaxis] + + prd_inp_avaperator_face_bgr = cv2.resize (avaperator_face_bgr, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) + + prd_face_bgr = self.predictor_func ( prd_inp_avaperator_face_bgr, prd_inp_dst_face_mask_a ) + + out_img = img_bgr.copy() + out_img = cv2.warpAffine( prd_face_bgr, face_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + out_img = np.clip(out_img, 0.0, 1.0) + + if debug: + debugs += [out_img.copy()] + + out_img = np.clip( img_bgr*(1-img_face_mask_aaa) + (out_img*img_face_mask_aaa) , 0, 1.0 ) + + if debug: + debugs += [out_img.copy()] + + + return debugs if debug else out_img diff --git a/converters/ConverterImage.py b/converters/ConverterImage.py index 8324002..58b1faa 100644 --- a/converters/ConverterImage.py +++ b/converters/ConverterImage.py @@ -1,50 +1,50 @@ -import time - -import cv2 -import numpy as np - -from facelib import FaceType, LandmarksProcessor -from joblib import SubprocessFunctionCaller -from utils.pickle_utils import AntiPickler - -from .Converter import Converter - -class ConverterImage(Converter): - - #override - def __init__(self, predictor_func, - predictor_input_size=0): - - super().__init__(predictor_func, Converter.TYPE_IMAGE) - - self.predictor_input_size = predictor_input_size - - #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower - predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ) ) - time.sleep(2) - - predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) - self.predictor_func_host = AntiPickler(predictor_func_host) - self.predictor_func = predictor_func - - #overridable - def on_host_tick(self): - self.predictor_func_host.obj.process_messages() - - #override - def cli_convert_image (self, img_bgr, img_landmarks, debug): - img_size = img_bgr.shape[1], img_bgr.shape[0] - - predictor_input_bgr = cv2.resize ( img_bgr, (self.predictor_input_size, self.predictor_input_size), cv2.INTER_LANCZOS4 ) - - if debug: - debugs = [predictor_input_bgr] - - output = self.predictor_func ( predictor_input_bgr ) - - if debug: - return (predictor_input_bgr,output,) - if debug: - debugs += [out_img.copy()] - - return debugs if debug else output +import time + +import cv2 +import numpy as np + +from facelib import FaceType, LandmarksProcessor +from joblib import SubprocessFunctionCaller +from utils.pickle_utils import AntiPickler + +from .Converter import Converter + +class ConverterImage(Converter): + + #override + def __init__(self, predictor_func, + predictor_input_size=0): + + super().__init__(predictor_func, Converter.TYPE_IMAGE) + + self.predictor_input_size = predictor_input_size + + #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower + predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ) ) + time.sleep(2) + + predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) + self.predictor_func_host = AntiPickler(predictor_func_host) + self.predictor_func = predictor_func + + #overridable + def on_host_tick(self): + self.predictor_func_host.obj.process_messages() + + #override + def cli_convert_image (self, img_bgr, img_landmarks, debug): + img_size = img_bgr.shape[1], img_bgr.shape[0] + + predictor_input_bgr = cv2.resize ( img_bgr, (self.predictor_input_size, self.predictor_input_size), cv2.INTER_LANCZOS4 ) + + if debug: + debugs = [predictor_input_bgr] + + output = self.predictor_func ( predictor_input_bgr ) + + if debug: + return (predictor_input_bgr,output,) + if debug: + debugs += [out_img.copy()] + + return debugs if debug else output diff --git a/converters/ConverterMasked.py b/converters/ConverterMasked.py index 4f6f372..4c2439a 100644 --- a/converters/ConverterMasked.py +++ b/converters/ConverterMasked.py @@ -1,436 +1,436 @@ -import time -import traceback - -import cv2 -import numpy as np - -import imagelib -from facelib import FaceType, FANSegmentator, LandmarksProcessor -from interact import interact as io -from joblib import SubprocessFunctionCaller -from utils.pickle_utils import AntiPickler - -from .Converter import Converter - - -''' -default_mode = {1:'overlay', - 2:'hist-match', - 3:'hist-match-bw', - 4:'seamless', - 5:'seamless-hist-match', - 6:'raw'} -''' -class ConverterMasked(Converter): - - #override - def __init__(self, predictor_func, - predictor_input_size=0, - predictor_masked=True, - face_type=FaceType.FULL, - default_mode = 4, - base_erode_mask_modifier = 0, - base_blur_mask_modifier = 0, - default_erode_mask_modifier = 0, - default_blur_mask_modifier = 0, - clip_hborder_mask_per = 0, - force_mask_mode=-1): - - super().__init__(predictor_func, Converter.TYPE_FACE) - - #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower - predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ) ) - time.sleep(2) - - predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) - self.predictor_func_host = AntiPickler(predictor_func_host) - self.predictor_func = predictor_func - - self.predictor_masked = predictor_masked - self.predictor_input_size = predictor_input_size - self.face_type = face_type - self.clip_hborder_mask_per = clip_hborder_mask_per - - mode = io.input_int ("Choose mode: (1) overlay, (2) hist match, (3) hist match bw, (4) seamless, (5) raw. Default - %d : " % (default_mode) , default_mode) - - mode_dict = {1:'overlay', - 2:'hist-match', - 3:'hist-match-bw', - 4:'seamless', - 5:'raw'} - - self.mode = mode_dict.get (mode, mode_dict[default_mode] ) - - if self.mode == 'raw': - mode = io.input_int ("Choose raw mode: (1) rgb, (2) rgb+mask (default), (3) mask only, (4) predicted only : ", 2) - self.raw_mode = {1:'rgb', - 2:'rgb-mask', - 3:'mask-only', - 4:'predicted-only'}.get (mode, 'rgb-mask') - - if self.mode != 'raw': - - if self.mode == 'seamless': - if io.input_bool("Seamless hist match? (y/n skip:n) : ", False): - self.mode = 'seamless-hist-match' - - if self.mode == 'hist-match' or self.mode == 'hist-match-bw': - self.masked_hist_match = io.input_bool("Masked hist match? (y/n skip:y) : ", True) - - if self.mode == 'hist-match' or self.mode == 'hist-match-bw' or self.mode == 'seamless-hist-match': - self.hist_match_threshold = np.clip ( io.input_int("Hist match threshold [0..255] (skip:255) : ", 255), 0, 255) - - if force_mask_mode != -1: - self.mask_mode = force_mask_mode - else: - if face_type == FaceType.FULL: - self.mask_mode = np.clip ( io.input_int ("Mask mode: (1) learned, (2) dst, (3) FAN-prd, (4) FAN-dst , (5) FAN-prd*FAN-dst (6) learned*FAN-prd*FAN-dst (?) help. Default - %d : " % (1) , 1, help_message="If you learned mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images. 'FAN-prd' - using super smooth mask by pretrained FAN-model from predicted face. 'FAN-dst' - using super smooth mask by pretrained FAN-model from dst face. 'FAN-prd*FAN-dst' or 'learned*FAN-prd*FAN-dst' - using multiplied masks."), 1, 6 ) - else: - self.mask_mode = np.clip ( io.input_int ("Mask mode: (1) learned, (2) dst . Default - %d : " % (1) , 1), 1, 2 ) - - if self.mask_mode >= 3 and self.mask_mode <= 6: - self.fan_seg = None - - if self.mode != 'raw': - self.erode_mask_modifier = base_erode_mask_modifier + np.clip ( io.input_int ("Choose erode mask modifier [-200..200] (skip:%d) : " % (default_erode_mask_modifier), default_erode_mask_modifier), -200, 200) - self.blur_mask_modifier = base_blur_mask_modifier + np.clip ( io.input_int ("Choose blur mask modifier [-200..200] (skip:%d) : " % (default_blur_mask_modifier), default_blur_mask_modifier), -200, 200) - - self.output_face_scale = np.clip ( 1.0 + io.input_int ("Choose output face scale modifier [-50..50] (skip:0) : ", 0)*0.01, 0.5, 1.5) - - if self.mode != 'raw': - self.color_transfer_mode = io.input_str ("Apply color transfer to predicted face? Choose mode ( rct/lct skip:None ) : ", None, ['rct','lct']) - - self.super_resolution = io.input_bool("Apply super resolution? (y/n ?:help skip:n) : ", False, help_message="Enhance details by applying DCSCN network.") - - if self.mode != 'raw': - self.final_image_color_degrade_power = np.clip ( io.input_int ("Degrade color power of final image [0..100] (skip:0) : ", 0), 0, 100) - self.alpha = io.input_bool("Export png with alpha channel? (y/n skip:n) : ", False) - - io.log_info ("") - - if self.super_resolution: - host_proc, dc_upscale = SubprocessFunctionCaller.make_pair( imagelib.DCSCN().upscale ) - self.dc_host = AntiPickler(host_proc) - self.dc_upscale = dc_upscale - else: - self.dc_host = None - - #overridable - def on_host_tick(self): - self.predictor_func_host.obj.process_messages() - - if self.dc_host is not None: - self.dc_host.obj.process_messages() - - #overridable - def on_cli_initialize(self): - if (self.mask_mode >= 3 and self.mask_mode <= 6) and self.fan_seg == None: - self.fan_seg = FANSegmentator(256, FaceType.toString( self.face_type ) ) - - #override - def cli_convert_face (self, img_bgr, img_face_landmarks, debug, **kwargs): - if debug: - debugs = [img_bgr.copy()] - - img_size = img_bgr.shape[1], img_bgr.shape[0] - - img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) - - output_size = self.predictor_input_size - if self.super_resolution: - output_size *= 2 - - face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=self.face_type) - face_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=self.face_type, scale=self.output_face_scale) - - dst_face_bgr = cv2.warpAffine( img_bgr , face_mat, (output_size, output_size), flags=cv2.INTER_LANCZOS4 ) - dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size), flags=cv2.INTER_LANCZOS4 ) - - predictor_input_bgr = cv2.resize (dst_face_bgr, (self.predictor_input_size,self.predictor_input_size)) - - if self.predictor_masked: - prd_face_bgr, prd_face_mask_a_0 = self.predictor_func (predictor_input_bgr) - - prd_face_bgr = np.clip (prd_face_bgr, 0, 1.0 ) - prd_face_mask_a_0 = np.clip (prd_face_mask_a_0, 0.0, 1.0) - else: - predicted = self.predictor_func (predictor_input_bgr) - prd_face_bgr = np.clip (predicted, 0, 1.0 ) - prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size)) - - if self.super_resolution: - if debug: - tmp = cv2.resize (prd_face_bgr, (output_size,output_size), cv2.INTER_CUBIC) - debugs += [ np.clip( cv2.warpAffine( tmp, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - prd_face_bgr = self.dc_upscale(prd_face_bgr) - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - if self.predictor_masked: - prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) - else: - prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) - - if self.mask_mode == 2: #dst - prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) - elif self.mask_mode >= 3 and self.mask_mode <= 6: - - if self.mask_mode == 3 or self.mask_mode == 5 or self.mask_mode == 6: - prd_face_bgr_256 = cv2.resize (prd_face_bgr, (256,256) ) - prd_face_bgr_256_mask = self.fan_seg.extract( prd_face_bgr_256 ) - FAN_prd_face_mask_a_0 = cv2.resize (prd_face_bgr_256_mask, (output_size,output_size), cv2.INTER_CUBIC) - - if self.mask_mode == 4 or self.mask_mode == 5 or self.mask_mode == 6: - face_256_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, 256, face_type=FaceType.FULL) - dst_face_256_bgr = cv2.warpAffine(img_bgr, face_256_mat, (256, 256), flags=cv2.INTER_LANCZOS4 ) - dst_face_256_mask = self.fan_seg.extract( dst_face_256_bgr ) - FAN_dst_face_mask_a_0 = cv2.resize (dst_face_256_mask, (output_size,output_size), cv2.INTER_CUBIC) - - if self.mask_mode == 3: #FAN-prd - prd_face_mask_a_0 = FAN_prd_face_mask_a_0 - elif self.mask_mode == 4: #FAN-dst - prd_face_mask_a_0 = FAN_dst_face_mask_a_0 - elif self.mask_mode == 5: - prd_face_mask_a_0 = FAN_prd_face_mask_a_0 * FAN_dst_face_mask_a_0 - elif self.mask_mode == 6: - prd_face_mask_a_0 = prd_face_mask_a_0 * FAN_prd_face_mask_a_0 * FAN_dst_face_mask_a_0 - - prd_face_mask_a_0[ prd_face_mask_a_0 < 0.001 ] = 0.0 - - prd_face_mask_a = prd_face_mask_a_0[...,np.newaxis] - prd_face_mask_aaa = np.repeat (prd_face_mask_a, (3,), axis=-1) - - img_face_mask_aaa = cv2.warpAffine( prd_face_mask_aaa, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) - img_face_mask_aaa = np.clip (img_face_mask_aaa, 0.0, 1.0) - img_face_mask_aaa [ img_face_mask_aaa <= 0.1 ] = 0.0 #get rid of noise - - if debug: - debugs += [img_face_mask_aaa.copy()] - - - out_img = img_bgr.copy() - - if self.mode == 'raw': - if self.raw_mode == 'rgb' or self.raw_mode == 'rgb-mask': - out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - - if self.raw_mode == 'rgb-mask': - out_img = np.concatenate ( [out_img, np.expand_dims (img_face_mask_aaa[:,:,0],-1)], -1 ) - - if self.raw_mode == 'mask-only': - out_img = img_face_mask_aaa - - if self.raw_mode == 'predicted-only': - out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(out_img.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - - else: - #averaging [lenx, leny, maskx, masky] by grayscale gradients of upscaled mask - ar = [] - for i in range(1, 10): - maxregion = np.argwhere( img_face_mask_aaa > i / 10.0 ) - if maxregion.size != 0: - miny,minx = maxregion.min(axis=0)[:2] - maxy,maxx = maxregion.max(axis=0)[:2] - lenx = maxx - minx - leny = maxy - miny - if min(lenx,leny) >= 4: - ar += [ [ lenx, leny] ] - - if len(ar) > 0: - lenx, leny = np.mean ( ar, axis=0 ) - lowest_len = min (lenx, leny) - if debug: - io.log_info ("lenx/leny:(%d/%d) " % (lenx, leny ) ) - io.log_info ("lowest_len = %f" % (lowest_len) ) - - if self.erode_mask_modifier != 0: - ero = int( lowest_len * ( 0.126 - lowest_len * 0.00004551365 ) * 0.01*self.erode_mask_modifier ) - if debug: - io.log_info ("erode_size = %d" % (ero) ) - if ero > 0: - img_face_mask_aaa = cv2.erode(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) - elif ero < 0: - img_face_mask_aaa = cv2.dilate(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(-ero,-ero)), iterations = 1 ) - - img_mask_blurry_aaa = img_face_mask_aaa - - if self.clip_hborder_mask_per > 0: #clip hborder before blur - prd_hborder_rect_mask_a = np.ones ( prd_face_mask_a.shape, dtype=np.float32) - prd_border_size = int ( prd_hborder_rect_mask_a.shape[1] * self.clip_hborder_mask_per ) - prd_hborder_rect_mask_a[:,0:prd_border_size,:] = 0 - prd_hborder_rect_mask_a[:,-prd_border_size:,:] = 0 - prd_hborder_rect_mask_a[-prd_border_size:,:,:] = 0 - prd_hborder_rect_mask_a = np.expand_dims(cv2.blur(prd_hborder_rect_mask_a, (prd_border_size, prd_border_size) ),-1) - - img_prd_hborder_rect_mask_a = cv2.warpAffine( prd_hborder_rect_mask_a, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) - img_prd_hborder_rect_mask_a = np.expand_dims (img_prd_hborder_rect_mask_a, -1) - img_mask_blurry_aaa *= img_prd_hborder_rect_mask_a - img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) - - if debug: - debugs += [img_mask_blurry_aaa.copy()] - - if self.blur_mask_modifier > 0: - blur = int( lowest_len * 0.10 * 0.01*self.blur_mask_modifier ) - if debug: - io.log_info ("blur_size = %d" % (blur) ) - if blur > 0: - img_mask_blurry_aaa = cv2.blur(img_mask_blurry_aaa, (blur, blur) ) - - img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) - face_mask_blurry_aaa = cv2.warpAffine( img_mask_blurry_aaa, face_mat, (output_size, output_size) ) - - if debug: - debugs += [img_mask_blurry_aaa.copy()] - - if 'seamless' not in self.mode and self.color_transfer_mode is not None: - if self.color_transfer_mode == 'rct': - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( (prd_face_bgr*255).astype(np.uint8), 0, 255), - np.clip( (dst_face_bgr*255).astype(np.uint8), 0, 255), - source_mask=prd_face_mask_a, target_mask=prd_face_mask_a) - prd_face_bgr = np.clip( prd_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) - - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - - elif self.color_transfer_mode == 'lct': - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - prd_face_bgr = imagelib.linear_color_transfer (prd_face_bgr, dst_face_bgr) - prd_face_bgr = np.clip( prd_face_bgr, 0.0, 1.0) - - if debug: - debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - if self.mode == 'hist-match-bw': - prd_face_bgr = cv2.cvtColor(prd_face_bgr, cv2.COLOR_BGR2GRAY) - prd_face_bgr = np.repeat( np.expand_dims (prd_face_bgr, -1), (3,), -1 ) - - if self.mode == 'hist-match' or self.mode == 'hist-match-bw': - if debug: - debugs += [ cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) ] - - hist_mask_a = np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) - - if self.masked_hist_match: - hist_mask_a *= prd_face_mask_a - - white = (1.0-hist_mask_a)* np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) - - hist_match_1 = prd_face_bgr*hist_mask_a + white - hist_match_1[ hist_match_1 > 1.0 ] = 1.0 - - hist_match_2 = dst_face_bgr*hist_mask_a + white - hist_match_2[ hist_match_1 > 1.0 ] = 1.0 - - prd_face_bgr = imagelib.color_hist_match(hist_match_1, hist_match_2, self.hist_match_threshold ) - - #if self.masked_hist_match: - # prd_face_bgr -= white - - if self.mode == 'hist-match-bw': - prd_face_bgr = prd_face_bgr.astype(dtype=np.float32) - - out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - out_img = np.clip(out_img, 0.0, 1.0) - - if debug: - debugs += [out_img.copy()] - - if self.mode == 'overlay': - pass - - if 'seamless' in self.mode: - #mask used for cv2.seamlessClone - img_face_seamless_mask_a = None - img_face_mask_a = img_mask_blurry_aaa[...,0:1] - for i in range(1,10): - a = img_face_mask_a > i / 10.0 - if len(np.argwhere(a)) == 0: - continue - img_face_seamless_mask_a = img_mask_blurry_aaa[...,0:1].copy() - img_face_seamless_mask_a[a] = 1.0 - img_face_seamless_mask_a[img_face_seamless_mask_a <= i / 10.0] = 0.0 - break - - try: - #calc same bounding rect and center point as in cv2.seamlessClone to prevent jittering - l,t,w,h = cv2.boundingRect( (img_face_seamless_mask_a*255).astype(np.uint8) ) - s_maskx, s_masky = int(l+w/2), int(t+h/2) - - out_img = cv2.seamlessClone( (out_img*255).astype(np.uint8), (img_bgr*255).astype(np.uint8), (img_face_seamless_mask_a*255).astype(np.uint8), (s_maskx,s_masky) , cv2.NORMAL_CLONE ) - out_img = out_img.astype(dtype=np.float32) / 255.0 - except Exception as e: - #seamlessClone may fail in some cases - e_str = traceback.format_exc() - - if 'MemoryError' in e_str: - raise Exception("Seamless fail: " + e_str) #reraise MemoryError in order to reprocess this data by other processes - else: - print ("Seamless fail: " + e_str) - - if debug: - debugs += [out_img.copy()] - - out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (out_img*img_mask_blurry_aaa) , 0, 1.0 ) - - if 'seamless' in self.mode and self.color_transfer_mode is not None: - out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) ) - - if self.color_transfer_mode == 'rct': - if debug: - debugs += [ np.clip( cv2.warpAffine( out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - new_out_face_bgr = imagelib.reinhard_color_transfer ( np.clip( (out_face_bgr*255).astype(np.uint8), 0, 255), - np.clip( (dst_face_bgr*255).astype(np.uint8), 0, 255), - source_mask=face_mask_blurry_aaa, target_mask=face_mask_blurry_aaa) - new_out_face_bgr = np.clip( new_out_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) - - if debug: - debugs += [ np.clip( cv2.warpAffine( new_out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - - elif self.color_transfer_mode == 'lct': - if debug: - debugs += [ np.clip( cv2.warpAffine( out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - new_out_face_bgr = imagelib.linear_color_transfer (out_face_bgr, dst_face_bgr) - new_out_face_bgr = np.clip( new_out_face_bgr, 0.0, 1.0) - - if debug: - debugs += [ np.clip( cv2.warpAffine( new_out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - - new_out = cv2.warpAffine( new_out_face_bgr, face_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (new_out*img_mask_blurry_aaa) , 0, 1.0 ) - - if self.mode == 'seamless-hist-match': - out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) ) - new_out_face_bgr = imagelib.color_hist_match(out_face_bgr, dst_face_bgr, self.hist_match_threshold) - new_out = cv2.warpAffine( new_out_face_bgr, face_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (new_out*img_mask_blurry_aaa) , 0, 1.0 ) - - if self.final_image_color_degrade_power != 0: - if debug: - debugs += [out_img.copy()] - out_img_reduced = imagelib.reduce_colors(out_img, 256) - if self.final_image_color_degrade_power == 100: - out_img = out_img_reduced - else: - alpha = self.final_image_color_degrade_power / 100.0 - out_img = (out_img*(1.0-alpha) + out_img_reduced*alpha) - - if self.alpha: - out_img = np.concatenate ( [out_img, np.expand_dims (img_mask_blurry_aaa[:,:,0],-1)], -1 ) - - out_img = np.clip (out_img, 0.0, 1.0 ) - - if debug: - debugs += [out_img.copy()] - +import time +import traceback + +import cv2 +import numpy as np + +import imagelib +from facelib import FaceType, FANSegmentator, LandmarksProcessor +from interact import interact as io +from joblib import SubprocessFunctionCaller +from utils.pickle_utils import AntiPickler + +from .Converter import Converter + + +''' +default_mode = {1:'overlay', + 2:'hist-match', + 3:'hist-match-bw', + 4:'seamless', + 5:'seamless-hist-match', + 6:'raw'} +''' +class ConverterMasked(Converter): + + #override + def __init__(self, predictor_func, + predictor_input_size=0, + predictor_masked=True, + face_type=FaceType.FULL, + default_mode = 4, + base_erode_mask_modifier = 0, + base_blur_mask_modifier = 0, + default_erode_mask_modifier = 0, + default_blur_mask_modifier = 0, + clip_hborder_mask_per = 0, + force_mask_mode=-1): + + super().__init__(predictor_func, Converter.TYPE_FACE) + + #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower + predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ) ) + time.sleep(2) + + predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) + self.predictor_func_host = AntiPickler(predictor_func_host) + self.predictor_func = predictor_func + + self.predictor_masked = predictor_masked + self.predictor_input_size = predictor_input_size + self.face_type = face_type + self.clip_hborder_mask_per = clip_hborder_mask_per + + mode = io.input_int ("Choose mode: (1) overlay, (2) hist match, (3) hist match bw, (4) seamless, (5) raw. Default - %d : " % (default_mode) , default_mode) + + mode_dict = {1:'overlay', + 2:'hist-match', + 3:'hist-match-bw', + 4:'seamless', + 5:'raw'} + + self.mode = mode_dict.get (mode, mode_dict[default_mode] ) + + if self.mode == 'raw': + mode = io.input_int ("Choose raw mode: (1) rgb, (2) rgb+mask (default), (3) mask only, (4) predicted only : ", 2) + self.raw_mode = {1:'rgb', + 2:'rgb-mask', + 3:'mask-only', + 4:'predicted-only'}.get (mode, 'rgb-mask') + + if self.mode != 'raw': + + if self.mode == 'seamless': + if io.input_bool("Seamless hist match? (y/n skip:n) : ", False): + self.mode = 'seamless-hist-match' + + if self.mode == 'hist-match' or self.mode == 'hist-match-bw': + self.masked_hist_match = io.input_bool("Masked hist match? (y/n skip:y) : ", True) + + if self.mode == 'hist-match' or self.mode == 'hist-match-bw' or self.mode == 'seamless-hist-match': + self.hist_match_threshold = np.clip ( io.input_int("Hist match threshold [0..255] (skip:255) : ", 255), 0, 255) + + if force_mask_mode != -1: + self.mask_mode = force_mask_mode + else: + if face_type == FaceType.FULL: + self.mask_mode = np.clip ( io.input_int ("Mask mode: (1) learned, (2) dst, (3) FAN-prd, (4) FAN-dst , (5) FAN-prd*FAN-dst (6) learned*FAN-prd*FAN-dst (?) help. Default - %d : " % (1) , 1, help_message="If you learned mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images. 'FAN-prd' - using super smooth mask by pretrained FAN-model from predicted face. 'FAN-dst' - using super smooth mask by pretrained FAN-model from dst face. 'FAN-prd*FAN-dst' or 'learned*FAN-prd*FAN-dst' - using multiplied masks."), 1, 6 ) + else: + self.mask_mode = np.clip ( io.input_int ("Mask mode: (1) learned, (2) dst . Default - %d : " % (1) , 1), 1, 2 ) + + if self.mask_mode >= 3 and self.mask_mode <= 6: + self.fan_seg = None + + if self.mode != 'raw': + self.erode_mask_modifier = base_erode_mask_modifier + np.clip ( io.input_int ("Choose erode mask modifier [-200..200] (skip:%d) : " % (default_erode_mask_modifier), default_erode_mask_modifier), -200, 200) + self.blur_mask_modifier = base_blur_mask_modifier + np.clip ( io.input_int ("Choose blur mask modifier [-200..200] (skip:%d) : " % (default_blur_mask_modifier), default_blur_mask_modifier), -200, 200) + + self.output_face_scale = np.clip ( 1.0 + io.input_int ("Choose output face scale modifier [-50..50] (skip:0) : ", 0)*0.01, 0.5, 1.5) + + if self.mode != 'raw': + self.color_transfer_mode = io.input_str ("Apply color transfer to predicted face? Choose mode ( rct/lct skip:None ) : ", None, ['rct','lct']) + + self.super_resolution = io.input_bool("Apply super resolution? (y/n ?:help skip:n) : ", False, help_message="Enhance details by applying DCSCN network.") + + if self.mode != 'raw': + self.final_image_color_degrade_power = np.clip ( io.input_int ("Degrade color power of final image [0..100] (skip:0) : ", 0), 0, 100) + self.alpha = io.input_bool("Export png with alpha channel? (y/n skip:n) : ", False) + + io.log_info ("") + + if self.super_resolution: + host_proc, dc_upscale = SubprocessFunctionCaller.make_pair( imagelib.DCSCN().upscale ) + self.dc_host = AntiPickler(host_proc) + self.dc_upscale = dc_upscale + else: + self.dc_host = None + + #overridable + def on_host_tick(self): + self.predictor_func_host.obj.process_messages() + + if self.dc_host is not None: + self.dc_host.obj.process_messages() + + #overridable + def on_cli_initialize(self): + if (self.mask_mode >= 3 and self.mask_mode <= 6) and self.fan_seg == None: + self.fan_seg = FANSegmentator(256, FaceType.toString( self.face_type ) ) + + #override + def cli_convert_face (self, img_bgr, img_face_landmarks, debug, **kwargs): + if debug: + debugs = [img_bgr.copy()] + + img_size = img_bgr.shape[1], img_bgr.shape[0] + + img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) + + output_size = self.predictor_input_size + if self.super_resolution: + output_size *= 2 + + face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=self.face_type) + face_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=self.face_type, scale=self.output_face_scale) + + dst_face_bgr = cv2.warpAffine( img_bgr , face_mat, (output_size, output_size), flags=cv2.INTER_LANCZOS4 ) + dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size), flags=cv2.INTER_LANCZOS4 ) + + predictor_input_bgr = cv2.resize (dst_face_bgr, (self.predictor_input_size,self.predictor_input_size)) + + if self.predictor_masked: + prd_face_bgr, prd_face_mask_a_0 = self.predictor_func (predictor_input_bgr) + + prd_face_bgr = np.clip (prd_face_bgr, 0, 1.0 ) + prd_face_mask_a_0 = np.clip (prd_face_mask_a_0, 0.0, 1.0) + else: + predicted = self.predictor_func (predictor_input_bgr) + prd_face_bgr = np.clip (predicted, 0, 1.0 ) + prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size)) + + if self.super_resolution: + if debug: + tmp = cv2.resize (prd_face_bgr, (output_size,output_size), cv2.INTER_CUBIC) + debugs += [ np.clip( cv2.warpAffine( tmp, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + prd_face_bgr = self.dc_upscale(prd_face_bgr) + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + if self.predictor_masked: + prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) + else: + prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) + + if self.mask_mode == 2: #dst + prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) + elif self.mask_mode >= 3 and self.mask_mode <= 6: + + if self.mask_mode == 3 or self.mask_mode == 5 or self.mask_mode == 6: + prd_face_bgr_256 = cv2.resize (prd_face_bgr, (256,256) ) + prd_face_bgr_256_mask = self.fan_seg.extract( prd_face_bgr_256 ) + FAN_prd_face_mask_a_0 = cv2.resize (prd_face_bgr_256_mask, (output_size,output_size), cv2.INTER_CUBIC) + + if self.mask_mode == 4 or self.mask_mode == 5 or self.mask_mode == 6: + face_256_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, 256, face_type=FaceType.FULL) + dst_face_256_bgr = cv2.warpAffine(img_bgr, face_256_mat, (256, 256), flags=cv2.INTER_LANCZOS4 ) + dst_face_256_mask = self.fan_seg.extract( dst_face_256_bgr ) + FAN_dst_face_mask_a_0 = cv2.resize (dst_face_256_mask, (output_size,output_size), cv2.INTER_CUBIC) + + if self.mask_mode == 3: #FAN-prd + prd_face_mask_a_0 = FAN_prd_face_mask_a_0 + elif self.mask_mode == 4: #FAN-dst + prd_face_mask_a_0 = FAN_dst_face_mask_a_0 + elif self.mask_mode == 5: + prd_face_mask_a_0 = FAN_prd_face_mask_a_0 * FAN_dst_face_mask_a_0 + elif self.mask_mode == 6: + prd_face_mask_a_0 = prd_face_mask_a_0 * FAN_prd_face_mask_a_0 * FAN_dst_face_mask_a_0 + + prd_face_mask_a_0[ prd_face_mask_a_0 < 0.001 ] = 0.0 + + prd_face_mask_a = prd_face_mask_a_0[...,np.newaxis] + prd_face_mask_aaa = np.repeat (prd_face_mask_a, (3,), axis=-1) + + img_face_mask_aaa = cv2.warpAffine( prd_face_mask_aaa, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) + img_face_mask_aaa = np.clip (img_face_mask_aaa, 0.0, 1.0) + img_face_mask_aaa [ img_face_mask_aaa <= 0.1 ] = 0.0 #get rid of noise + + if debug: + debugs += [img_face_mask_aaa.copy()] + + + out_img = img_bgr.copy() + + if self.mode == 'raw': + if self.raw_mode == 'rgb' or self.raw_mode == 'rgb-mask': + out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + + if self.raw_mode == 'rgb-mask': + out_img = np.concatenate ( [out_img, np.expand_dims (img_face_mask_aaa[:,:,0],-1)], -1 ) + + if self.raw_mode == 'mask-only': + out_img = img_face_mask_aaa + + if self.raw_mode == 'predicted-only': + out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(out_img.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + + else: + #averaging [lenx, leny, maskx, masky] by grayscale gradients of upscaled mask + ar = [] + for i in range(1, 10): + maxregion = np.argwhere( img_face_mask_aaa > i / 10.0 ) + if maxregion.size != 0: + miny,minx = maxregion.min(axis=0)[:2] + maxy,maxx = maxregion.max(axis=0)[:2] + lenx = maxx - minx + leny = maxy - miny + if min(lenx,leny) >= 4: + ar += [ [ lenx, leny] ] + + if len(ar) > 0: + lenx, leny = np.mean ( ar, axis=0 ) + lowest_len = min (lenx, leny) + if debug: + io.log_info ("lenx/leny:(%d/%d) " % (lenx, leny ) ) + io.log_info ("lowest_len = %f" % (lowest_len) ) + + if self.erode_mask_modifier != 0: + ero = int( lowest_len * ( 0.126 - lowest_len * 0.00004551365 ) * 0.01*self.erode_mask_modifier ) + if debug: + io.log_info ("erode_size = %d" % (ero) ) + if ero > 0: + img_face_mask_aaa = cv2.erode(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) + elif ero < 0: + img_face_mask_aaa = cv2.dilate(img_face_mask_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(-ero,-ero)), iterations = 1 ) + + img_mask_blurry_aaa = img_face_mask_aaa + + if self.clip_hborder_mask_per > 0: #clip hborder before blur + prd_hborder_rect_mask_a = np.ones ( prd_face_mask_a.shape, dtype=np.float32) + prd_border_size = int ( prd_hborder_rect_mask_a.shape[1] * self.clip_hborder_mask_per ) + prd_hborder_rect_mask_a[:,0:prd_border_size,:] = 0 + prd_hborder_rect_mask_a[:,-prd_border_size:,:] = 0 + prd_hborder_rect_mask_a[-prd_border_size:,:,:] = 0 + prd_hborder_rect_mask_a = np.expand_dims(cv2.blur(prd_hborder_rect_mask_a, (prd_border_size, prd_border_size) ),-1) + + img_prd_hborder_rect_mask_a = cv2.warpAffine( prd_hborder_rect_mask_a, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) + img_prd_hborder_rect_mask_a = np.expand_dims (img_prd_hborder_rect_mask_a, -1) + img_mask_blurry_aaa *= img_prd_hborder_rect_mask_a + img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) + + if debug: + debugs += [img_mask_blurry_aaa.copy()] + + if self.blur_mask_modifier > 0: + blur = int( lowest_len * 0.10 * 0.01*self.blur_mask_modifier ) + if debug: + io.log_info ("blur_size = %d" % (blur) ) + if blur > 0: + img_mask_blurry_aaa = cv2.blur(img_mask_blurry_aaa, (blur, blur) ) + + img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) + face_mask_blurry_aaa = cv2.warpAffine( img_mask_blurry_aaa, face_mat, (output_size, output_size) ) + + if debug: + debugs += [img_mask_blurry_aaa.copy()] + + if 'seamless' not in self.mode and self.color_transfer_mode is not None: + if self.color_transfer_mode == 'rct': + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( (prd_face_bgr*255).astype(np.uint8), 0, 255), + np.clip( (dst_face_bgr*255).astype(np.uint8), 0, 255), + source_mask=prd_face_mask_a, target_mask=prd_face_mask_a) + prd_face_bgr = np.clip( prd_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) + + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + + elif self.color_transfer_mode == 'lct': + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + prd_face_bgr = imagelib.linear_color_transfer (prd_face_bgr, dst_face_bgr) + prd_face_bgr = np.clip( prd_face_bgr, 0.0, 1.0) + + if debug: + debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + if self.mode == 'hist-match-bw': + prd_face_bgr = cv2.cvtColor(prd_face_bgr, cv2.COLOR_BGR2GRAY) + prd_face_bgr = np.repeat( np.expand_dims (prd_face_bgr, -1), (3,), -1 ) + + if self.mode == 'hist-match' or self.mode == 'hist-match-bw': + if debug: + debugs += [ cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) ] + + hist_mask_a = np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) + + if self.masked_hist_match: + hist_mask_a *= prd_face_mask_a + + white = (1.0-hist_mask_a)* np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) + + hist_match_1 = prd_face_bgr*hist_mask_a + white + hist_match_1[ hist_match_1 > 1.0 ] = 1.0 + + hist_match_2 = dst_face_bgr*hist_mask_a + white + hist_match_2[ hist_match_1 > 1.0 ] = 1.0 + + prd_face_bgr = imagelib.color_hist_match(hist_match_1, hist_match_2, self.hist_match_threshold ) + + #if self.masked_hist_match: + # prd_face_bgr -= white + + if self.mode == 'hist-match-bw': + prd_face_bgr = prd_face_bgr.astype(dtype=np.float32) + + out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + out_img = np.clip(out_img, 0.0, 1.0) + + if debug: + debugs += [out_img.copy()] + + if self.mode == 'overlay': + pass + + if 'seamless' in self.mode: + #mask used for cv2.seamlessClone + img_face_seamless_mask_a = None + img_face_mask_a = img_mask_blurry_aaa[...,0:1] + for i in range(1,10): + a = img_face_mask_a > i / 10.0 + if len(np.argwhere(a)) == 0: + continue + img_face_seamless_mask_a = img_mask_blurry_aaa[...,0:1].copy() + img_face_seamless_mask_a[a] = 1.0 + img_face_seamless_mask_a[img_face_seamless_mask_a <= i / 10.0] = 0.0 + break + + try: + #calc same bounding rect and center point as in cv2.seamlessClone to prevent jittering + l,t,w,h = cv2.boundingRect( (img_face_seamless_mask_a*255).astype(np.uint8) ) + s_maskx, s_masky = int(l+w/2), int(t+h/2) + + out_img = cv2.seamlessClone( (out_img*255).astype(np.uint8), (img_bgr*255).astype(np.uint8), (img_face_seamless_mask_a*255).astype(np.uint8), (s_maskx,s_masky) , cv2.NORMAL_CLONE ) + out_img = out_img.astype(dtype=np.float32) / 255.0 + except Exception as e: + #seamlessClone may fail in some cases + e_str = traceback.format_exc() + + if 'MemoryError' in e_str: + raise Exception("Seamless fail: " + e_str) #reraise MemoryError in order to reprocess this data by other processes + else: + print ("Seamless fail: " + e_str) + + if debug: + debugs += [out_img.copy()] + + out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (out_img*img_mask_blurry_aaa) , 0, 1.0 ) + + if 'seamless' in self.mode and self.color_transfer_mode is not None: + out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) ) + + if self.color_transfer_mode == 'rct': + if debug: + debugs += [ np.clip( cv2.warpAffine( out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + new_out_face_bgr = imagelib.reinhard_color_transfer ( np.clip( (out_face_bgr*255).astype(np.uint8), 0, 255), + np.clip( (dst_face_bgr*255).astype(np.uint8), 0, 255), + source_mask=face_mask_blurry_aaa, target_mask=face_mask_blurry_aaa) + new_out_face_bgr = np.clip( new_out_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) + + if debug: + debugs += [ np.clip( cv2.warpAffine( new_out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + + elif self.color_transfer_mode == 'lct': + if debug: + debugs += [ np.clip( cv2.warpAffine( out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + new_out_face_bgr = imagelib.linear_color_transfer (out_face_bgr, dst_face_bgr) + new_out_face_bgr = np.clip( new_out_face_bgr, 0.0, 1.0) + + if debug: + debugs += [ np.clip( cv2.warpAffine( new_out_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] + + new_out = cv2.warpAffine( new_out_face_bgr, face_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (new_out*img_mask_blurry_aaa) , 0, 1.0 ) + + if self.mode == 'seamless-hist-match': + out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) ) + new_out_face_bgr = imagelib.color_hist_match(out_face_bgr, dst_face_bgr, self.hist_match_threshold) + new_out = cv2.warpAffine( new_out_face_bgr, face_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (new_out*img_mask_blurry_aaa) , 0, 1.0 ) + + if self.final_image_color_degrade_power != 0: + if debug: + debugs += [out_img.copy()] + out_img_reduced = imagelib.reduce_colors(out_img, 256) + if self.final_image_color_degrade_power == 100: + out_img = out_img_reduced + else: + alpha = self.final_image_color_degrade_power / 100.0 + out_img = (out_img*(1.0-alpha) + out_img_reduced*alpha) + + if self.alpha: + out_img = np.concatenate ( [out_img, np.expand_dims (img_mask_blurry_aaa[:,:,0],-1)], -1 ) + + out_img = np.clip (out_img, 0.0, 1.0 ) + + if debug: + debugs += [out_img.copy()] + return debugs if debug else out_img \ No newline at end of file diff --git a/converters/__init__.py b/converters/__init__.py index be5d9c1..ef544cb 100644 --- a/converters/__init__.py +++ b/converters/__init__.py @@ -1,4 +1,4 @@ -from .Converter import Converter -from .ConverterMasked import ConverterMasked -from .ConverterImage import ConverterImage -from .ConverterAvatar import ConverterAvatar +from .Converter import Converter +from .ConverterMasked import ConverterMasked +from .ConverterImage import ConverterImage +from .ConverterAvatar import ConverterAvatar diff --git a/doc/doc_build_and_repository_info.md b/doc/doc_build_and_repository_info.md index cf9b5b1..89a740e 100644 --- a/doc/doc_build_and_repository_info.md +++ b/doc/doc_build_and_repository_info.md @@ -1,5 +1,5 @@ -#### **CPU mode** - -It is possible to run from script for all stages using the `--cpu-only` flag. To run from script, install the separate dependencies for CPU mode using `pip -r requirements-cpu.txt`. - +#### **CPU mode** + +It is possible to run from script for all stages using the `--cpu-only` flag. To run from script, install the separate dependencies for CPU mode using `pip -r requirements-cpu.txt`. + Please note that extraction and training will take much long without a GPU and performance will greatly suffer without one. In particular, do not use DLIB extractor in CPU mode, it's too slow to run without a GPU. Train only on 64px resolution models like H64 or SAE (with low settings) and the lightweight encoder. \ No newline at end of file diff --git a/doc/doc_prebuilt_windows_app.md b/doc/doc_prebuilt_windows_app.md index 0c0d37a..5746f5a 100644 --- a/doc/doc_prebuilt_windows_app.md +++ b/doc/doc_prebuilt_windows_app.md @@ -1,25 +1,25 @@ -### **Prebuilt Windows Releases** - -Windows builds with all dependencies included are released regularly. Only the NVIDIA GeForce display driver needs to be installed. Prebuilt DeepFaceLab, including GPU and CPU versions, can be downloaded from - -[Google drive](https://drive.google.com/open?id=1BCFK_L7lPNwMbEQ_kFPqPpDdFEOd_Dci) - -[Mega](https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg) - -Available builds: - -* DeepFaceLabCUDA9.2SSE - for NVIDIA cards up to GTX1080 and any 64-bit CPU - -* DeepFaceLabCUDA10.1AVX - for NVIDIA cards up to RTX and CPU with AVX instructions support - -* DeepFaceLabOpenCLSSE - for AMD/IntelHD cards and any 64-bit CPU - -#### Video tutorials using prebuilt windows app - -* [Basic workflow](https://www.youtube.com/watch?v=K98nTNjXkq8) - -* [Basic workflow (thanks @derpfakes)](https://www.youtube.com/watch?v=cVcyghhmQSA) - -* [How To Make DeepFakes With DeepFaceLab - An Amatuer's Guide](https://www.youtube.com/watch?v=wBax7_UWXvc) - +### **Prebuilt Windows Releases** + +Windows builds with all dependencies included are released regularly. Only the NVIDIA GeForce display driver needs to be installed. Prebuilt DeepFaceLab, including GPU and CPU versions, can be downloaded from + +[Google drive](https://drive.google.com/open?id=1BCFK_L7lPNwMbEQ_kFPqPpDdFEOd_Dci) + +[Mega](https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg) + +Available builds: + +* DeepFaceLabCUDA9.2SSE - for NVIDIA cards up to GTX1080 and any 64-bit CPU + +* DeepFaceLabCUDA10.1AVX - for NVIDIA cards up to RTX and CPU with AVX instructions support + +* DeepFaceLabOpenCLSSE - for AMD/IntelHD cards and any 64-bit CPU + +#### Video tutorials using prebuilt windows app + +* [Basic workflow](https://www.youtube.com/watch?v=K98nTNjXkq8) + +* [Basic workflow (thanks @derpfakes)](https://www.youtube.com/watch?v=cVcyghhmQSA) + +* [How To Make DeepFakes With DeepFaceLab - An Amatuer's Guide](https://www.youtube.com/watch?v=wBax7_UWXvc) + * [Manual re-extract poorly aligned frames](https://www.youtube.com/watch?v=7z1ykVVCHhM) \ No newline at end of file diff --git a/doc/doc_ready_to_work_facesets.md b/doc/doc_ready_to_work_facesets.md index 13ada80..fb3344f 100644 --- a/doc/doc_ready_to_work_facesets.md +++ b/doc/doc_ready_to_work_facesets.md @@ -1,11 +1,11 @@ -### **Example Face Sets**: - -Faces sets for the following have been pre-extracted, - -- Nicolas Cage -- Steve Jobs -- Putin -- Elon Musk -- Harrison Ford - -[Download from Mega](https://mega.nz/#F!y1ERHDaL!PPwg01PQZk0FhWLVo5_MaQ) +### **Example Face Sets**: + +Faces sets for the following have been pre-extracted, + +- Nicolas Cage +- Steve Jobs +- Putin +- Elon Musk +- Harrison Ford + +[Download from Mega](https://mega.nz/#F!y1ERHDaL!PPwg01PQZk0FhWLVo5_MaQ) diff --git a/doc/gallery/doc_gallery.md b/doc/gallery/doc_gallery.md index aac2d37..5ba780f 100644 --- a/doc/gallery/doc_gallery.md +++ b/doc/gallery/doc_gallery.md @@ -1,3 +1,3 @@ -![](1.jpg) - +![](1.jpg) + ![](2.jpg) \ No newline at end of file diff --git a/facelib/DLIBExtractor.py b/facelib/DLIBExtractor.py index b8230ad..a91164d 100644 --- a/facelib/DLIBExtractor.py +++ b/facelib/DLIBExtractor.py @@ -1,40 +1,40 @@ -import numpy as np -import os -import cv2 - -from pathlib import Path - -class DLIBExtractor(object): - def __init__(self, dlib): - self.scale_to = 1850 - #3100 eats ~1.687GB VRAM on 2GB 730 desktop card, but >4Gb on 6GB card, - #but 3100 doesnt work on 2GB 850M notebook card, I cant understand this behaviour - #1850 works on 2GB 850M notebook card, works faster than 3100, produces good result - self.dlib = dlib - - def __enter__(self): - self.dlib_cnn_face_detector = self.dlib.cnn_face_detection_model_v1( str(Path(__file__).parent / "mmod_human_face_detector.dat") ) - self.dlib_cnn_face_detector ( np.zeros ( (self.scale_to, self.scale_to, 3), dtype=np.uint8), 0 ) - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - del self.dlib_cnn_face_detector - return False #pass exception between __enter__ and __exit__ to outter level - - def extract_from_bgr (self, input_image): - input_image = input_image[:,:,::-1].copy() - (h, w, ch) = input_image.shape - - detected_faces = [] - input_scale = self.scale_to / (w if w > h else h) - input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) - detected_faces = self.dlib_cnn_face_detector(input_image, 0) - - result = [] - for d_rect in detected_faces: - if type(d_rect) == self.dlib.mmod_rectangle: - d_rect = d_rect.rect - left, top, right, bottom = d_rect.left(), d_rect.top(), d_rect.right(), d_rect.bottom() - result.append ( (int(left/input_scale), int(top/input_scale), int(right/input_scale), int(bottom/input_scale)) ) - - return result +import numpy as np +import os +import cv2 + +from pathlib import Path + +class DLIBExtractor(object): + def __init__(self, dlib): + self.scale_to = 1850 + #3100 eats ~1.687GB VRAM on 2GB 730 desktop card, but >4Gb on 6GB card, + #but 3100 doesnt work on 2GB 850M notebook card, I cant understand this behaviour + #1850 works on 2GB 850M notebook card, works faster than 3100, produces good result + self.dlib = dlib + + def __enter__(self): + self.dlib_cnn_face_detector = self.dlib.cnn_face_detection_model_v1( str(Path(__file__).parent / "mmod_human_face_detector.dat") ) + self.dlib_cnn_face_detector ( np.zeros ( (self.scale_to, self.scale_to, 3), dtype=np.uint8), 0 ) + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + del self.dlib_cnn_face_detector + return False #pass exception between __enter__ and __exit__ to outter level + + def extract_from_bgr (self, input_image): + input_image = input_image[:,:,::-1].copy() + (h, w, ch) = input_image.shape + + detected_faces = [] + input_scale = self.scale_to / (w if w > h else h) + input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) + detected_faces = self.dlib_cnn_face_detector(input_image, 0) + + result = [] + for d_rect in detected_faces: + if type(d_rect) == self.dlib.mmod_rectangle: + d_rect = d_rect.rect + left, top, right, bottom = d_rect.left(), d_rect.top(), d_rect.right(), d_rect.bottom() + result.append ( (int(left/input_scale), int(top/input_scale), int(right/input_scale), int(bottom/input_scale)) ) + + return result diff --git a/facelib/FANSegmentator.py b/facelib/FANSegmentator.py index d944aaa..e2ae705 100644 --- a/facelib/FANSegmentator.py +++ b/facelib/FANSegmentator.py @@ -1,139 +1,139 @@ -import os -import pickle -from functools import partial -from pathlib import Path - -import cv2 -import numpy as np - -from interact import interact as io -from nnlib import nnlib - -""" -FANSegmentator is designed to exclude obstructions from faces such as hair, fingers, etc. - -Dataset used to train located in official DFL mega.nz folder -https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg - -using https://github.com/ternaus/TernausNet -TernausNet: U-Net with VGG11 Encoder Pre-Trained on ImageNet for Image Segmentation -""" - -class FANSegmentator(object): - VERSION = 1 - def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): - exec( nnlib.import_all(), locals(), globals() ) - - self.model = FANSegmentator.BuildModel(resolution, ngf=64) - - if weights_file_root is not None: - weights_file_root = Path(weights_file_root) - else: - weights_file_root = Path(__file__).parent - - self.weights_path = weights_file_root / ('FANSeg_%d_%s.h5' % (resolution, face_type_str) ) - - if load_weights: - self.model.load_weights (str(self.weights_path)) - else: - if training: - try: - with open( Path(__file__).parent / 'vgg11_enc_weights.npy', 'rb' ) as f: - d = pickle.loads (f.read()) - - for i in [0,3,6,8,11,13,16,18]: - s = 'features.%d' % i - - self.model.get_layer (s).set_weights ( d[s] ) - except: - io.log_err("Unable to load VGG11 pretrained weights from vgg11_enc_weights.npy") - - if training: - #self.model.compile(loss='mse', optimizer=Adam(tf_cpu_mode=2)) - self.model.compile(loss='binary_crossentropy', optimizer=Adam(tf_cpu_mode=2) ) - - def __enter__(self): - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def save_weights(self): - self.model.save_weights (str(self.weights_path)) - - def train_on_batch(self, inp, outp): - return self.model.train_on_batch(inp, outp) - - def extract (self, input_image, is_input_tanh=False): - input_shape_len = len(input_image.shape) - if input_shape_len == 3: - input_image = input_image[np.newaxis,...] - - result = np.clip ( self.model.predict( [input_image] ), 0, 1.0 ) - result[result < 0.1] = 0 #get rid of noise - - if input_shape_len == 3: - result = result[0] - - return result - - @staticmethod - def BuildModel ( resolution, ngf=64, norm='', act='lrelu'): - exec( nnlib.import_all(), locals(), globals() ) - inp = Input ( (resolution,resolution,3) ) - x = inp - x = FANSegmentator.Flow(ngf=ngf, norm=norm, act=act)(x) - model = Model(inp,x) - return model - - @staticmethod - def Flow(ngf=64, num_downs=4, norm='', act='lrelu'): - exec( nnlib.import_all(), locals(), globals() ) - - def func(input): - x = input - - x0 = x = Conv2D(ngf, kernel_size=3, strides=1, padding='same', activation='relu', name='features.0')(x) - x = MaxPooling2D()(x) - - x1 = x = Conv2D(ngf*2, kernel_size=3, strides=1, padding='same', activation='relu', name='features.3')(x) - x = MaxPooling2D()(x) - - x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.6')(x) - x2 = x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.8')(x) - x = MaxPooling2D()(x) - - x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.11')(x) - x3 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.13')(x) - x = MaxPooling2D()(x) - - x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.16')(x) - x4 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.18')(x) - x = MaxPooling2D()(x) - - x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same')(x) - - x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x4]) - x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu') (x) - - x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x3]) - x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu') (x) - - x = Conv2DTranspose (ngf*2, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x2]) - x = Conv2D (ngf*4, 3, strides=1, padding='same', activation='relu') (x) - - x = Conv2DTranspose (ngf, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x1]) - x = Conv2D (ngf*2, 3, strides=1, padding='same', activation='relu') (x) - - x = Conv2DTranspose (ngf // 2, 3, strides=2, padding='same', activation='relu') (x) - x = Concatenate(axis=3)([ x, x0]) - x = Conv2D (ngf, 3, strides=1, padding='same', activation='relu') (x) - - return Conv2D(1, 3, strides=1, padding='same', activation='sigmoid')(x) - - - return func +import os +import pickle +from functools import partial +from pathlib import Path + +import cv2 +import numpy as np + +from interact import interact as io +from nnlib import nnlib + +""" +FANSegmentator is designed to exclude obstructions from faces such as hair, fingers, etc. + +Dataset used to train located in official DFL mega.nz folder +https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg + +using https://github.com/ternaus/TernausNet +TernausNet: U-Net with VGG11 Encoder Pre-Trained on ImageNet for Image Segmentation +""" + +class FANSegmentator(object): + VERSION = 1 + def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): + exec( nnlib.import_all(), locals(), globals() ) + + self.model = FANSegmentator.BuildModel(resolution, ngf=64) + + if weights_file_root is not None: + weights_file_root = Path(weights_file_root) + else: + weights_file_root = Path(__file__).parent + + self.weights_path = weights_file_root / ('FANSeg_%d_%s.h5' % (resolution, face_type_str) ) + + if load_weights: + self.model.load_weights (str(self.weights_path)) + else: + if training: + try: + with open( Path(__file__).parent / 'vgg11_enc_weights.npy', 'rb' ) as f: + d = pickle.loads (f.read()) + + for i in [0,3,6,8,11,13,16,18]: + s = 'features.%d' % i + + self.model.get_layer (s).set_weights ( d[s] ) + except: + io.log_err("Unable to load VGG11 pretrained weights from vgg11_enc_weights.npy") + + if training: + #self.model.compile(loss='mse', optimizer=Adam(tf_cpu_mode=2)) + self.model.compile(loss='binary_crossentropy', optimizer=Adam(tf_cpu_mode=2) ) + + def __enter__(self): + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def save_weights(self): + self.model.save_weights (str(self.weights_path)) + + def train_on_batch(self, inp, outp): + return self.model.train_on_batch(inp, outp) + + def extract (self, input_image, is_input_tanh=False): + input_shape_len = len(input_image.shape) + if input_shape_len == 3: + input_image = input_image[np.newaxis,...] + + result = np.clip ( self.model.predict( [input_image] ), 0, 1.0 ) + result[result < 0.1] = 0 #get rid of noise + + if input_shape_len == 3: + result = result[0] + + return result + + @staticmethod + def BuildModel ( resolution, ngf=64, norm='', act='lrelu'): + exec( nnlib.import_all(), locals(), globals() ) + inp = Input ( (resolution,resolution,3) ) + x = inp + x = FANSegmentator.Flow(ngf=ngf, norm=norm, act=act)(x) + model = Model(inp,x) + return model + + @staticmethod + def Flow(ngf=64, num_downs=4, norm='', act='lrelu'): + exec( nnlib.import_all(), locals(), globals() ) + + def func(input): + x = input + + x0 = x = Conv2D(ngf, kernel_size=3, strides=1, padding='same', activation='relu', name='features.0')(x) + x = MaxPooling2D()(x) + + x1 = x = Conv2D(ngf*2, kernel_size=3, strides=1, padding='same', activation='relu', name='features.3')(x) + x = MaxPooling2D()(x) + + x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.6')(x) + x2 = x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.8')(x) + x = MaxPooling2D()(x) + + x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.11')(x) + x3 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.13')(x) + x = MaxPooling2D()(x) + + x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.16')(x) + x4 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.18')(x) + x = MaxPooling2D()(x) + + x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same')(x) + + x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x4]) + x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu') (x) + + x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x3]) + x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu') (x) + + x = Conv2DTranspose (ngf*2, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x2]) + x = Conv2D (ngf*4, 3, strides=1, padding='same', activation='relu') (x) + + x = Conv2DTranspose (ngf, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x1]) + x = Conv2D (ngf*2, 3, strides=1, padding='same', activation='relu') (x) + + x = Conv2DTranspose (ngf // 2, 3, strides=2, padding='same', activation='relu') (x) + x = Concatenate(axis=3)([ x, x0]) + x = Conv2D (ngf, 3, strides=1, padding='same', activation='relu') (x) + + return Conv2D(1, 3, strides=1, padding='same', activation='sigmoid')(x) + + + return func diff --git a/facelib/FaceType.py b/facelib/FaceType.py index 5cd1e8f..f0d5530 100644 --- a/facelib/FaceType.py +++ b/facelib/FaceType.py @@ -1,33 +1,33 @@ -from enum import IntEnum - -class FaceType(IntEnum): - HALF = 0, - FULL = 1, - HEAD = 2, - AVATAR = 3, #centered nose only - MARK_ONLY = 4, #no align at all, just embedded faceinfo - QTY = 5 - - @staticmethod - def fromString (s): - r = from_string_dict.get (s.lower()) - if r is None: - raise Exception ('FaceType.fromString value error') - return r - - @staticmethod - def toString (face_type): - return to_string_list[face_type] - -from_string_dict = {'half_face': FaceType.HALF, - 'full_face': FaceType.FULL, - 'head' : FaceType.HEAD, - 'avatar' : FaceType.AVATAR, - 'mark_only' : FaceType.MARK_ONLY, - } -to_string_list = [ 'half_face', - 'full_face', - 'head', - 'avatar', - 'mark_only' - ] +from enum import IntEnum + +class FaceType(IntEnum): + HALF = 0, + FULL = 1, + HEAD = 2, + AVATAR = 3, #centered nose only + MARK_ONLY = 4, #no align at all, just embedded faceinfo + QTY = 5 + + @staticmethod + def fromString (s): + r = from_string_dict.get (s.lower()) + if r is None: + raise Exception ('FaceType.fromString value error') + return r + + @staticmethod + def toString (face_type): + return to_string_list[face_type] + +from_string_dict = {'half_face': FaceType.HALF, + 'full_face': FaceType.FULL, + 'head' : FaceType.HEAD, + 'avatar' : FaceType.AVATAR, + 'mark_only' : FaceType.MARK_ONLY, + } +to_string_list = [ 'half_face', + 'full_face', + 'head', + 'avatar', + 'mark_only' + ] diff --git a/facelib/LandmarksExtractor.py b/facelib/LandmarksExtractor.py index 1f37814..c9ca49d 100644 --- a/facelib/LandmarksExtractor.py +++ b/facelib/LandmarksExtractor.py @@ -1,120 +1,120 @@ -import traceback -import numpy as np -import os -import cv2 -from pathlib import Path -from facelib import FaceType -from facelib import LandmarksProcessor - -class LandmarksExtractor(object): - def __init__ (self, keras): - self.keras = keras - K = self.keras.backend - - def __enter__(self): - keras_model_path = Path(__file__).parent / "2DFAN-4.h5" - if not keras_model_path.exists(): - return None - - self.keras_model = self.keras.models.load_model (str(keras_model_path)) - - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - del self.keras_model - return False #pass exception between __enter__ and __exit__ to outter level - - def extract (self, input_image, rects, second_pass_extractor=None, is_bgr=True): - if len(rects) == 0: - return [] - - if is_bgr: - input_image = input_image[:,:,::-1] - is_bgr = False - - (h, w, ch) = input_image.shape - - landmarks = [] - for (left, top, right, bottom) in rects: - try: - center = np.array( [ (left + right) / 2.0, (top + bottom) / 2.0] ) - scale = (right - left + bottom - top) / 195.0 - - image = self.crop(input_image, center, scale).astype(np.float32) - image = np.expand_dims(image, 0) - - predicted = self.keras_model.predict (image).transpose (0,3,1,2) - - pts_img = self.get_pts_from_predict ( predicted[-1], center, scale) - landmarks.append (pts_img) - except: - landmarks.append (None) - - if second_pass_extractor is not None: - for i in range(len(landmarks)): - try: - lmrks = landmarks[i] - if lmrks is None: - continue - - image_to_face_mat = LandmarksProcessor.get_transform_mat (lmrks, 256, FaceType.FULL) - face_image = cv2.warpAffine(input_image, image_to_face_mat, (256, 256), cv2.INTER_CUBIC ) - - rects2 = second_pass_extractor.extract(face_image, is_bgr=is_bgr) - if len(rects2) != 1: #dont do second pass if faces != 1 detected in cropped image - continue - - lmrks2 = self.extract (face_image, [ rects2[0] ], is_bgr=is_bgr)[0] - source_lmrks2 = LandmarksProcessor.transform_points (lmrks2, image_to_face_mat, True) - landmarks[i] = source_lmrks2 - except: - continue - - return landmarks - - def transform(self, point, center, scale, resolution): - pt = np.array ( [point[0], point[1], 1.0] ) - h = 200.0 * scale - m = np.eye(3) - m[0,0] = resolution / h - m[1,1] = resolution / h - m[0,2] = resolution * ( -center[0] / h + 0.5 ) - m[1,2] = resolution * ( -center[1] / h + 0.5 ) - m = np.linalg.inv(m) - return np.matmul (m, pt)[0:2] - - def crop(self, image, center, scale, resolution=256.0): - ul = self.transform([1, 1], center, scale, resolution).astype( np.int ) - br = self.transform([resolution, resolution], center, scale, resolution).astype( np.int ) - - if image.ndim > 2: - newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32) - newImg = np.zeros(newDim, dtype=np.uint8) - else: - newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int) - newImg = np.zeros(newDim, dtype=np.uint8) - ht = image.shape[0] - wd = image.shape[1] - newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32) - newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32) - oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32) - oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32) - newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :] - - newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR) - return newImg - - def get_pts_from_predict(self, a, center, scale): - b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) ) - c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float) - c[:,0] %= a.shape[2] - c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] ) - - for i in range(a.shape[0]): - pX, pY = int(c[i,0]), int(c[i,1]) - if pX > 0 and pX < 63 and pY > 0 and pY < 63: - diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] ) - c[i] += np.sign(diff)*0.25 - - c += 0.5 - return np.array( [ self.transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ] ) +import traceback +import numpy as np +import os +import cv2 +from pathlib import Path +from facelib import FaceType +from facelib import LandmarksProcessor + +class LandmarksExtractor(object): + def __init__ (self, keras): + self.keras = keras + K = self.keras.backend + + def __enter__(self): + keras_model_path = Path(__file__).parent / "2DFAN-4.h5" + if not keras_model_path.exists(): + return None + + self.keras_model = self.keras.models.load_model (str(keras_model_path)) + + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + del self.keras_model + return False #pass exception between __enter__ and __exit__ to outter level + + def extract (self, input_image, rects, second_pass_extractor=None, is_bgr=True): + if len(rects) == 0: + return [] + + if is_bgr: + input_image = input_image[:,:,::-1] + is_bgr = False + + (h, w, ch) = input_image.shape + + landmarks = [] + for (left, top, right, bottom) in rects: + try: + center = np.array( [ (left + right) / 2.0, (top + bottom) / 2.0] ) + scale = (right - left + bottom - top) / 195.0 + + image = self.crop(input_image, center, scale).astype(np.float32) + image = np.expand_dims(image, 0) + + predicted = self.keras_model.predict (image).transpose (0,3,1,2) + + pts_img = self.get_pts_from_predict ( predicted[-1], center, scale) + landmarks.append (pts_img) + except: + landmarks.append (None) + + if second_pass_extractor is not None: + for i in range(len(landmarks)): + try: + lmrks = landmarks[i] + if lmrks is None: + continue + + image_to_face_mat = LandmarksProcessor.get_transform_mat (lmrks, 256, FaceType.FULL) + face_image = cv2.warpAffine(input_image, image_to_face_mat, (256, 256), cv2.INTER_CUBIC ) + + rects2 = second_pass_extractor.extract(face_image, is_bgr=is_bgr) + if len(rects2) != 1: #dont do second pass if faces != 1 detected in cropped image + continue + + lmrks2 = self.extract (face_image, [ rects2[0] ], is_bgr=is_bgr)[0] + source_lmrks2 = LandmarksProcessor.transform_points (lmrks2, image_to_face_mat, True) + landmarks[i] = source_lmrks2 + except: + continue + + return landmarks + + def transform(self, point, center, scale, resolution): + pt = np.array ( [point[0], point[1], 1.0] ) + h = 200.0 * scale + m = np.eye(3) + m[0,0] = resolution / h + m[1,1] = resolution / h + m[0,2] = resolution * ( -center[0] / h + 0.5 ) + m[1,2] = resolution * ( -center[1] / h + 0.5 ) + m = np.linalg.inv(m) + return np.matmul (m, pt)[0:2] + + def crop(self, image, center, scale, resolution=256.0): + ul = self.transform([1, 1], center, scale, resolution).astype( np.int ) + br = self.transform([resolution, resolution], center, scale, resolution).astype( np.int ) + + if image.ndim > 2: + newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32) + newImg = np.zeros(newDim, dtype=np.uint8) + else: + newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int) + newImg = np.zeros(newDim, dtype=np.uint8) + ht = image.shape[0] + wd = image.shape[1] + newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32) + newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32) + oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32) + oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32) + newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :] + + newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR) + return newImg + + def get_pts_from_predict(self, a, center, scale): + b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) ) + c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float) + c[:,0] %= a.shape[2] + c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] ) + + for i in range(a.shape[0]): + pX, pY = int(c[i,0]), int(c[i,1]) + if pX > 0 and pX < 63 and pY > 0 and pY < 63: + diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] ) + c[i] += np.sign(diff)*0.25 + + c += 0.5 + return np.array( [ self.transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ] ) diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py index 06d895d..66c29dd 100644 --- a/facelib/LandmarksProcessor.py +++ b/facelib/LandmarksProcessor.py @@ -1,386 +1,386 @@ -import colorsys -import cv2 -import numpy as np -from enum import IntEnum -import mathlib -import imagelib -from imagelib import IEPolys -from mathlib.umeyama import umeyama -from facelib import FaceType -import math - -mean_face_x = np.array([ -0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, -0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, -0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, -0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, -0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, -0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, -0.553364, 0.490127, 0.42689 ]) - -mean_face_y = np.array([ -0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891, -0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326, -0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733, -0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099, -0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805, -0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746, -0.784792, 0.824182, 0.831803, 0.824182 ]) - -landmarks_2D = np.stack( [ mean_face_x, mean_face_y ], axis=1 ) - -# 68 point landmark definitions -landmarks_68_pt = { "mouth": (48,68), - "right_eyebrow": (17, 22), - "left_eyebrow": (22, 27), - "right_eye": (36, 42), - "left_eye": (42, 48), - "nose": (27, 36), # missed one point - "jaw": (0, 17) } - - -landmarks_68_3D = np.array( [ -[-73.393523 , -29.801432 , 47.667532 ], -[-72.775014 , -10.949766 , 45.909403 ], -[-70.533638 , 7.929818 , 44.842580 ], -[-66.850058 , 26.074280 , 43.141114 ], -[-59.790187 , 42.564390 , 38.635298 ], -[-48.368973 , 56.481080 , 30.750622 ], -[-34.121101 , 67.246992 , 18.456453 ], -[-17.875411 , 75.056892 , 3.609035 ], -[0.098749 , 77.061286 , -0.881698 ], -[17.477031 , 74.758448 , 5.181201 ], -[32.648966 , 66.929021 , 19.176563 ], -[46.372358 , 56.311389 , 30.770570 ], -[57.343480 , 42.419126 , 37.628629 ], -[64.388482 , 25.455880 , 40.886309 ], -[68.212038 , 6.990805 , 42.281449 ], -[70.486405 , -11.666193 , 44.142567 ], -[71.375822 , -30.365191 , 47.140426 ], -[-61.119406 , -49.361602 , 14.254422 ], -[-51.287588 , -58.769795 , 7.268147 ], -[-37.804800 , -61.996155 , 0.442051 ], -[-24.022754 , -61.033399 , -6.606501 ], -[-11.635713 , -56.686759 , -11.967398 ], -[12.056636 , -57.391033 , -12.051204 ], -[25.106256 , -61.902186 , -7.315098 ], -[38.338588 , -62.777713 , -1.022953 ], -[51.191007 , -59.302347 , 5.349435 ], -[60.053851 , -50.190255 , 11.615746 ], -[0.653940 , -42.193790 , -13.380835 ], -[0.804809 , -30.993721 , -21.150853 ], -[0.992204 , -19.944596 , -29.284036 ], -[1.226783 , -8.414541 , -36.948060 ], -[-14.772472 , 2.598255 , -20.132003 ], -[-7.180239 , 4.751589 , -23.536684 ], -[0.555920 , 6.562900 , -25.944448 ], -[8.272499 , 4.661005 , -23.695741 ], -[15.214351 , 2.643046 , -20.858157 ], -[-46.047290 , -37.471411 , 7.037989 ], -[-37.674688 , -42.730510 , 3.021217 ], -[-27.883856 , -42.711517 , 1.353629 ], -[-19.648268 , -36.754742 , -0.111088 ], -[-28.272965 , -35.134493 , -0.147273 ], -[-38.082418 , -34.919043 , 1.476612 ], -[19.265868 , -37.032306 , -0.665746 ], -[27.894191 , -43.342445 , 0.247660 ], -[37.437529 , -43.110822 , 1.696435 ], -[45.170805 , -38.086515 , 4.894163 ], -[38.196454 , -35.532024 , 0.282961 ], -[28.764989 , -35.484289 , -1.172675 ], -[-28.916267 , 28.612716 , -2.240310 ], -[-17.533194 , 22.172187 , -15.934335 ], -[-6.684590 , 19.029051 , -22.611355 ], -[0.381001 , 20.721118 , -23.748437 ], -[8.375443 , 19.035460 , -22.721995 ], -[18.876618 , 22.394109 , -15.610679 ], -[28.794412 , 28.079924 , -3.217393 ], -[19.057574 , 36.298248 , -14.987997 ], -[8.956375 , 39.634575 , -22.554245 ], -[0.381549 , 40.395647 , -23.591626 ], -[-7.428895 , 39.836405 , -22.406106 ], -[-18.160634 , 36.677899 , -15.121907 ], -[-24.377490 , 28.677771 , -4.785684 ], -[-6.897633 , 25.475976 , -20.893742 ], -[0.340663 , 26.014269 , -22.220479 ], -[8.444722 , 25.326198 , -21.025520 ], -[24.474473 , 28.323008 , -5.712776 ], -[8.449166 , 30.596216 , -20.671489 ], -[0.205322 , 31.408738 , -21.903670 ], -[-7.198266 , 30.844876 , -20.328022 ] ], dtype=np.float32) - -def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): - if not isinstance(image_landmarks, np.ndarray): - image_landmarks = np.array (image_landmarks) - - if face_type == FaceType.AVATAR: - centroid = np.mean (image_landmarks, axis=0) - - mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] - a, c = mat[0,0], mat[1,0] - scale = math.sqrt((a * a) + (c * c)) - - padding = (output_size / 64) * 32 - - mat = np.eye ( 2,3 ) - mat[0,2] = -centroid[0] - mat[1,2] = -centroid[1] - mat = mat * scale * (output_size / 3) - mat[:,2] += output_size / 2 - else: - if face_type == FaceType.HALF: - padding = 0 - elif face_type == FaceType.FULL: - padding = (output_size / 64) * 12 - elif face_type == FaceType.HEAD: - padding = (output_size / 64) * 24 - else: - raise ValueError ('wrong face_type: ', face_type) - - mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] - mat = mat * (output_size - 2 * padding) - mat[:,2] += padding - mat *= (1 / scale) - mat[:,2] += -output_size*( ( (1 / scale) - 1.0 ) / 2 ) - - return mat - -def transform_points(points, mat, invert=False): - if invert: - mat = cv2.invertAffineTransform (mat) - points = np.expand_dims(points, axis=1) - points = cv2.transform(points, mat, points.shape) - points = np.squeeze(points) - return points - - -def get_image_hull_mask (image_shape, image_landmarks, ie_polys=None): - if len(image_landmarks) != 68: - raise Exception('get_image_hull_mask works only with 68 landmarks') - int_lmrks = np.array(image_landmarks.copy(), dtype=np.int) - - hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[0:9], - # int_lmrks[17:18]))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[8:17], - # int_lmrks[26:27]))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[17:20], - # int_lmrks[8:9]))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[24:27], - # int_lmrks[8:9]))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[19:25], - # int_lmrks[8:9], - # ))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[17:22], - # int_lmrks[27:28], - # int_lmrks[31:36], - # int_lmrks[8:9] - # ))) , (1,) ) - - # cv2.fillConvexPoly( hull_mask, cv2.convexHull( - # np.concatenate ( (int_lmrks[22:27], - # int_lmrks[27:28], - # int_lmrks[31:36], - # int_lmrks[8:9] - # ))) , (1,) ) - - # #nose - # cv2.fillConvexPoly( hull_mask, cv2.convexHull(int_lmrks[27:36]), (1,) ) - ml_pnt = (int_lmrks[36] + int_lmrks[0]) // 2 - mr_pnt = (int_lmrks[16] + int_lmrks[45]) // 2 - - # mid points between the mid points and eye - ql_pnt = (int_lmrks[36] + ml_pnt) // 2 - qr_pnt = (int_lmrks[45] + mr_pnt) // 2 - - # Top of the eye arrays - bot_l = np.array((ql_pnt, int_lmrks[36], int_lmrks[37], int_lmrks[38], int_lmrks[39])) - bot_r = np.array((int_lmrks[42], int_lmrks[43], int_lmrks[44], int_lmrks[45], qr_pnt)) - - # Eyebrow arrays - top_l = int_lmrks[17:22] - top_r = int_lmrks[22:27] - - # Adjust eyebrow arrays - int_lmrks[17:22] = top_l + ((top_l - bot_l) // 2) - int_lmrks[22:27] = top_r + ((top_r - bot_r) // 2) - - r_jaw = (int_lmrks[0:9], int_lmrks[17:18]) - l_jaw = (int_lmrks[8:17], int_lmrks[26:27]) - r_cheek = (int_lmrks[17:20], int_lmrks[8:9]) - l_cheek = (int_lmrks[24:27], int_lmrks[8:9]) - nose_ridge = (int_lmrks[19:25], int_lmrks[8:9],) - r_eye = (int_lmrks[17:22], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) - l_eye = (int_lmrks[22:27], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) - nose = (int_lmrks[27:31], int_lmrks[31:36]) - parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] - - for item in parts: - merged = np.concatenate(item) - cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member - - if ie_polys is not None: - ie_polys.overlay_mask(hull_mask) - - return hull_mask - -def get_image_eye_mask (image_shape, image_landmarks): - if len(image_landmarks) != 68: - raise Exception('get_image_eye_mask works only with 68 landmarks') - - hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) - - cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[36:42]), (1,) ) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[42:48]), (1,) ) - - return hull_mask - -def blur_image_hull_mask (hull_mask): - - maxregion = np.argwhere(hull_mask==1.0) - miny,minx = maxregion.min(axis=0)[:2] - maxy,maxx = maxregion.max(axis=0)[:2] - lenx = maxx - minx; - leny = maxy - miny; - masky = int(minx+(lenx//2)) - maskx = int(miny+(leny//2)) - lowest_len = min (lenx, leny) - ero = int( lowest_len * 0.085 ) - blur = int( lowest_len * 0.10 ) - - hull_mask = cv2.erode(hull_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) - hull_mask = cv2.blur(hull_mask, (blur, blur) ) - hull_mask = np.expand_dims (hull_mask,-1) - - return hull_mask - -mirror_idxs = [ - [0,16], - [1,15], - [2,14], - [3,13], - [4,12], - [5,11], - [6,10], - [7,9], - - [17,26], - [18,25], - [19,24], - [20,23], - [21,22], - - [36,45], - [37,44], - [38,43], - [39,42], - [40,47], - [41,46], - - [31,35], - [32,34], - - [50,52], - [49,53], - [48,54], - [59,55], - [58,56], - [67,65], - [60,64], - [61,63] ] - -def mirror_landmarks (landmarks, val): - result = landmarks.copy() - - for idx in mirror_idxs: - result [ idx ] = result [ idx[::-1] ] - - result[:,0] = val - result[:,0] - 1 - return result - -def draw_landmarks (image, image_landmarks, color=(0,255,0), transparent_mask=False, ie_polys=None): - if len(image_landmarks) != 68: - raise Exception('get_image_eye_mask works only with 68 landmarks') - - int_lmrks = np.array(image_landmarks, dtype=np.int) - - jaw = int_lmrks[slice(*landmarks_68_pt["jaw"])] - right_eyebrow = int_lmrks[slice(*landmarks_68_pt["right_eyebrow"])] - left_eyebrow = int_lmrks[slice(*landmarks_68_pt["left_eyebrow"])] - mouth = int_lmrks[slice(*landmarks_68_pt["mouth"])] - right_eye = int_lmrks[slice(*landmarks_68_pt["right_eye"])] - left_eye = int_lmrks[slice(*landmarks_68_pt["left_eye"])] - nose = int_lmrks[slice(*landmarks_68_pt["nose"])] - - # open shapes - cv2.polylines(image, tuple(np.array([v]) for v in ( right_eyebrow, jaw, left_eyebrow, np.concatenate((nose, [nose[-6]])) )), - False, color, lineType=cv2.LINE_AA) - # closed shapes - cv2.polylines(image, tuple(np.array([v]) for v in (right_eye, left_eye, mouth)), - True, color, lineType=cv2.LINE_AA) - # the rest of the cicles - for x, y in np.concatenate((right_eyebrow, left_eyebrow, mouth, right_eye, left_eye, nose), axis=0): - cv2.circle(image, (x, y), 1, color, 1, lineType=cv2.LINE_AA) - # jaw big circles - for x, y in jaw: - cv2.circle(image, (x, y), 2, color, lineType=cv2.LINE_AA) - - if transparent_mask: - mask = get_image_hull_mask (image.shape, image_landmarks, ie_polys) - image[...] = ( image * (1-mask) + image * mask / 2 )[...] - -def draw_rect_landmarks (image, rect, image_landmarks, face_size, face_type, transparent_mask=False, ie_polys=None, landmarks_color=(0,255,0) ): - draw_landmarks(image, image_landmarks, color=landmarks_color, transparent_mask=transparent_mask, ie_polys=ie_polys) - imagelib.draw_rect (image, rect, (255,0,0), 2 ) - - image_to_face_mat = get_transform_mat (image_landmarks, face_size, face_type) - points = transform_points ( [ (0,0), (0,face_size-1), (face_size-1, face_size-1), (face_size-1,0) ], image_to_face_mat, True) - imagelib.draw_polygon (image, points, (0,0,255), 2) - -def calc_face_pitch(landmarks): - if not isinstance(landmarks, np.ndarray): - landmarks = np.array (landmarks) - t = ( (landmarks[6][1]-landmarks[8][1]) + (landmarks[10][1]-landmarks[8][1]) ) / 2.0 - b = landmarks[8][1] - return float(b-t) - -def calc_face_yaw(landmarks): - if not isinstance(landmarks, np.ndarray): - landmarks = np.array (landmarks) - l = ( (landmarks[27][0]-landmarks[0][0]) + (landmarks[28][0]-landmarks[1][0]) + (landmarks[29][0]-landmarks[2][0]) ) / 3.0 - r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0 - return float(r-l) - -#returns pitch,yaw,roll [-1...+1] -def estimate_pitch_yaw_roll(aligned_256px_landmarks): - shape = (256,256) - focal_length = shape[1] - camera_center = (shape[1] / 2, shape[0] / 2) - camera_matrix = np.array( - [[focal_length, 0, camera_center[0]], - [0, focal_length, camera_center[1]], - [0, 0, 1]], dtype=np.float32) - - (_, rotation_vector, translation_vector) = cv2.solvePnP( - landmarks_68_3D, - aligned_256px_landmarks.astype(np.float32), - camera_matrix, - np.zeros((4, 1)) ) - - pitch, yaw, roll = mathlib.rotationMatrixToEulerAngles( cv2.Rodrigues(rotation_vector)[0] ) - pitch = np.clip ( pitch/1.30, -1.0, 1.0 ) - yaw = np.clip ( yaw / 1.11, -1.0, 1.0 ) - roll = np.clip ( roll/3.15, -1.0, 1.0 ) - return -pitch, yaw, roll +import colorsys +import cv2 +import numpy as np +from enum import IntEnum +import mathlib +import imagelib +from imagelib import IEPolys +from mathlib.umeyama import umeyama +from facelib import FaceType +import math + +mean_face_x = np.array([ +0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, +0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, +0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, +0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, +0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, +0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, +0.553364, 0.490127, 0.42689 ]) + +mean_face_y = np.array([ +0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891, +0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326, +0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733, +0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099, +0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805, +0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746, +0.784792, 0.824182, 0.831803, 0.824182 ]) + +landmarks_2D = np.stack( [ mean_face_x, mean_face_y ], axis=1 ) + +# 68 point landmark definitions +landmarks_68_pt = { "mouth": (48,68), + "right_eyebrow": (17, 22), + "left_eyebrow": (22, 27), + "right_eye": (36, 42), + "left_eye": (42, 48), + "nose": (27, 36), # missed one point + "jaw": (0, 17) } + + +landmarks_68_3D = np.array( [ +[-73.393523 , -29.801432 , 47.667532 ], +[-72.775014 , -10.949766 , 45.909403 ], +[-70.533638 , 7.929818 , 44.842580 ], +[-66.850058 , 26.074280 , 43.141114 ], +[-59.790187 , 42.564390 , 38.635298 ], +[-48.368973 , 56.481080 , 30.750622 ], +[-34.121101 , 67.246992 , 18.456453 ], +[-17.875411 , 75.056892 , 3.609035 ], +[0.098749 , 77.061286 , -0.881698 ], +[17.477031 , 74.758448 , 5.181201 ], +[32.648966 , 66.929021 , 19.176563 ], +[46.372358 , 56.311389 , 30.770570 ], +[57.343480 , 42.419126 , 37.628629 ], +[64.388482 , 25.455880 , 40.886309 ], +[68.212038 , 6.990805 , 42.281449 ], +[70.486405 , -11.666193 , 44.142567 ], +[71.375822 , -30.365191 , 47.140426 ], +[-61.119406 , -49.361602 , 14.254422 ], +[-51.287588 , -58.769795 , 7.268147 ], +[-37.804800 , -61.996155 , 0.442051 ], +[-24.022754 , -61.033399 , -6.606501 ], +[-11.635713 , -56.686759 , -11.967398 ], +[12.056636 , -57.391033 , -12.051204 ], +[25.106256 , -61.902186 , -7.315098 ], +[38.338588 , -62.777713 , -1.022953 ], +[51.191007 , -59.302347 , 5.349435 ], +[60.053851 , -50.190255 , 11.615746 ], +[0.653940 , -42.193790 , -13.380835 ], +[0.804809 , -30.993721 , -21.150853 ], +[0.992204 , -19.944596 , -29.284036 ], +[1.226783 , -8.414541 , -36.948060 ], +[-14.772472 , 2.598255 , -20.132003 ], +[-7.180239 , 4.751589 , -23.536684 ], +[0.555920 , 6.562900 , -25.944448 ], +[8.272499 , 4.661005 , -23.695741 ], +[15.214351 , 2.643046 , -20.858157 ], +[-46.047290 , -37.471411 , 7.037989 ], +[-37.674688 , -42.730510 , 3.021217 ], +[-27.883856 , -42.711517 , 1.353629 ], +[-19.648268 , -36.754742 , -0.111088 ], +[-28.272965 , -35.134493 , -0.147273 ], +[-38.082418 , -34.919043 , 1.476612 ], +[19.265868 , -37.032306 , -0.665746 ], +[27.894191 , -43.342445 , 0.247660 ], +[37.437529 , -43.110822 , 1.696435 ], +[45.170805 , -38.086515 , 4.894163 ], +[38.196454 , -35.532024 , 0.282961 ], +[28.764989 , -35.484289 , -1.172675 ], +[-28.916267 , 28.612716 , -2.240310 ], +[-17.533194 , 22.172187 , -15.934335 ], +[-6.684590 , 19.029051 , -22.611355 ], +[0.381001 , 20.721118 , -23.748437 ], +[8.375443 , 19.035460 , -22.721995 ], +[18.876618 , 22.394109 , -15.610679 ], +[28.794412 , 28.079924 , -3.217393 ], +[19.057574 , 36.298248 , -14.987997 ], +[8.956375 , 39.634575 , -22.554245 ], +[0.381549 , 40.395647 , -23.591626 ], +[-7.428895 , 39.836405 , -22.406106 ], +[-18.160634 , 36.677899 , -15.121907 ], +[-24.377490 , 28.677771 , -4.785684 ], +[-6.897633 , 25.475976 , -20.893742 ], +[0.340663 , 26.014269 , -22.220479 ], +[8.444722 , 25.326198 , -21.025520 ], +[24.474473 , 28.323008 , -5.712776 ], +[8.449166 , 30.596216 , -20.671489 ], +[0.205322 , 31.408738 , -21.903670 ], +[-7.198266 , 30.844876 , -20.328022 ] ], dtype=np.float32) + +def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): + if not isinstance(image_landmarks, np.ndarray): + image_landmarks = np.array (image_landmarks) + + if face_type == FaceType.AVATAR: + centroid = np.mean (image_landmarks, axis=0) + + mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] + a, c = mat[0,0], mat[1,0] + scale = math.sqrt((a * a) + (c * c)) + + padding = (output_size / 64) * 32 + + mat = np.eye ( 2,3 ) + mat[0,2] = -centroid[0] + mat[1,2] = -centroid[1] + mat = mat * scale * (output_size / 3) + mat[:,2] += output_size / 2 + else: + if face_type == FaceType.HALF: + padding = 0 + elif face_type == FaceType.FULL: + padding = (output_size / 64) * 12 + elif face_type == FaceType.HEAD: + padding = (output_size / 64) * 24 + else: + raise ValueError ('wrong face_type: ', face_type) + + mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] + mat = mat * (output_size - 2 * padding) + mat[:,2] += padding + mat *= (1 / scale) + mat[:,2] += -output_size*( ( (1 / scale) - 1.0 ) / 2 ) + + return mat + +def transform_points(points, mat, invert=False): + if invert: + mat = cv2.invertAffineTransform (mat) + points = np.expand_dims(points, axis=1) + points = cv2.transform(points, mat, points.shape) + points = np.squeeze(points) + return points + + +def get_image_hull_mask (image_shape, image_landmarks, ie_polys=None): + if len(image_landmarks) != 68: + raise Exception('get_image_hull_mask works only with 68 landmarks') + int_lmrks = np.array(image_landmarks.copy(), dtype=np.int) + + hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[0:9], + # int_lmrks[17:18]))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[8:17], + # int_lmrks[26:27]))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[17:20], + # int_lmrks[8:9]))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[24:27], + # int_lmrks[8:9]))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[19:25], + # int_lmrks[8:9], + # ))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[17:22], + # int_lmrks[27:28], + # int_lmrks[31:36], + # int_lmrks[8:9] + # ))) , (1,) ) + + # cv2.fillConvexPoly( hull_mask, cv2.convexHull( + # np.concatenate ( (int_lmrks[22:27], + # int_lmrks[27:28], + # int_lmrks[31:36], + # int_lmrks[8:9] + # ))) , (1,) ) + + # #nose + # cv2.fillConvexPoly( hull_mask, cv2.convexHull(int_lmrks[27:36]), (1,) ) + ml_pnt = (int_lmrks[36] + int_lmrks[0]) // 2 + mr_pnt = (int_lmrks[16] + int_lmrks[45]) // 2 + + # mid points between the mid points and eye + ql_pnt = (int_lmrks[36] + ml_pnt) // 2 + qr_pnt = (int_lmrks[45] + mr_pnt) // 2 + + # Top of the eye arrays + bot_l = np.array((ql_pnt, int_lmrks[36], int_lmrks[37], int_lmrks[38], int_lmrks[39])) + bot_r = np.array((int_lmrks[42], int_lmrks[43], int_lmrks[44], int_lmrks[45], qr_pnt)) + + # Eyebrow arrays + top_l = int_lmrks[17:22] + top_r = int_lmrks[22:27] + + # Adjust eyebrow arrays + int_lmrks[17:22] = top_l + ((top_l - bot_l) // 2) + int_lmrks[22:27] = top_r + ((top_r - bot_r) // 2) + + r_jaw = (int_lmrks[0:9], int_lmrks[17:18]) + l_jaw = (int_lmrks[8:17], int_lmrks[26:27]) + r_cheek = (int_lmrks[17:20], int_lmrks[8:9]) + l_cheek = (int_lmrks[24:27], int_lmrks[8:9]) + nose_ridge = (int_lmrks[19:25], int_lmrks[8:9],) + r_eye = (int_lmrks[17:22], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) + l_eye = (int_lmrks[22:27], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) + nose = (int_lmrks[27:31], int_lmrks[31:36]) + parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] + + for item in parts: + merged = np.concatenate(item) + cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member + + if ie_polys is not None: + ie_polys.overlay_mask(hull_mask) + + return hull_mask + +def get_image_eye_mask (image_shape, image_landmarks): + if len(image_landmarks) != 68: + raise Exception('get_image_eye_mask works only with 68 landmarks') + + hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) + + cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[36:42]), (1,) ) + cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[42:48]), (1,) ) + + return hull_mask + +def blur_image_hull_mask (hull_mask): + + maxregion = np.argwhere(hull_mask==1.0) + miny,minx = maxregion.min(axis=0)[:2] + maxy,maxx = maxregion.max(axis=0)[:2] + lenx = maxx - minx; + leny = maxy - miny; + masky = int(minx+(lenx//2)) + maskx = int(miny+(leny//2)) + lowest_len = min (lenx, leny) + ero = int( lowest_len * 0.085 ) + blur = int( lowest_len * 0.10 ) + + hull_mask = cv2.erode(hull_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) + hull_mask = cv2.blur(hull_mask, (blur, blur) ) + hull_mask = np.expand_dims (hull_mask,-1) + + return hull_mask + +mirror_idxs = [ + [0,16], + [1,15], + [2,14], + [3,13], + [4,12], + [5,11], + [6,10], + [7,9], + + [17,26], + [18,25], + [19,24], + [20,23], + [21,22], + + [36,45], + [37,44], + [38,43], + [39,42], + [40,47], + [41,46], + + [31,35], + [32,34], + + [50,52], + [49,53], + [48,54], + [59,55], + [58,56], + [67,65], + [60,64], + [61,63] ] + +def mirror_landmarks (landmarks, val): + result = landmarks.copy() + + for idx in mirror_idxs: + result [ idx ] = result [ idx[::-1] ] + + result[:,0] = val - result[:,0] - 1 + return result + +def draw_landmarks (image, image_landmarks, color=(0,255,0), transparent_mask=False, ie_polys=None): + if len(image_landmarks) != 68: + raise Exception('get_image_eye_mask works only with 68 landmarks') + + int_lmrks = np.array(image_landmarks, dtype=np.int) + + jaw = int_lmrks[slice(*landmarks_68_pt["jaw"])] + right_eyebrow = int_lmrks[slice(*landmarks_68_pt["right_eyebrow"])] + left_eyebrow = int_lmrks[slice(*landmarks_68_pt["left_eyebrow"])] + mouth = int_lmrks[slice(*landmarks_68_pt["mouth"])] + right_eye = int_lmrks[slice(*landmarks_68_pt["right_eye"])] + left_eye = int_lmrks[slice(*landmarks_68_pt["left_eye"])] + nose = int_lmrks[slice(*landmarks_68_pt["nose"])] + + # open shapes + cv2.polylines(image, tuple(np.array([v]) for v in ( right_eyebrow, jaw, left_eyebrow, np.concatenate((nose, [nose[-6]])) )), + False, color, lineType=cv2.LINE_AA) + # closed shapes + cv2.polylines(image, tuple(np.array([v]) for v in (right_eye, left_eye, mouth)), + True, color, lineType=cv2.LINE_AA) + # the rest of the cicles + for x, y in np.concatenate((right_eyebrow, left_eyebrow, mouth, right_eye, left_eye, nose), axis=0): + cv2.circle(image, (x, y), 1, color, 1, lineType=cv2.LINE_AA) + # jaw big circles + for x, y in jaw: + cv2.circle(image, (x, y), 2, color, lineType=cv2.LINE_AA) + + if transparent_mask: + mask = get_image_hull_mask (image.shape, image_landmarks, ie_polys) + image[...] = ( image * (1-mask) + image * mask / 2 )[...] + +def draw_rect_landmarks (image, rect, image_landmarks, face_size, face_type, transparent_mask=False, ie_polys=None, landmarks_color=(0,255,0) ): + draw_landmarks(image, image_landmarks, color=landmarks_color, transparent_mask=transparent_mask, ie_polys=ie_polys) + imagelib.draw_rect (image, rect, (255,0,0), 2 ) + + image_to_face_mat = get_transform_mat (image_landmarks, face_size, face_type) + points = transform_points ( [ (0,0), (0,face_size-1), (face_size-1, face_size-1), (face_size-1,0) ], image_to_face_mat, True) + imagelib.draw_polygon (image, points, (0,0,255), 2) + +def calc_face_pitch(landmarks): + if not isinstance(landmarks, np.ndarray): + landmarks = np.array (landmarks) + t = ( (landmarks[6][1]-landmarks[8][1]) + (landmarks[10][1]-landmarks[8][1]) ) / 2.0 + b = landmarks[8][1] + return float(b-t) + +def calc_face_yaw(landmarks): + if not isinstance(landmarks, np.ndarray): + landmarks = np.array (landmarks) + l = ( (landmarks[27][0]-landmarks[0][0]) + (landmarks[28][0]-landmarks[1][0]) + (landmarks[29][0]-landmarks[2][0]) ) / 3.0 + r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0 + return float(r-l) + +#returns pitch,yaw,roll [-1...+1] +def estimate_pitch_yaw_roll(aligned_256px_landmarks): + shape = (256,256) + focal_length = shape[1] + camera_center = (shape[1] / 2, shape[0] / 2) + camera_matrix = np.array( + [[focal_length, 0, camera_center[0]], + [0, focal_length, camera_center[1]], + [0, 0, 1]], dtype=np.float32) + + (_, rotation_vector, translation_vector) = cv2.solvePnP( + landmarks_68_3D, + aligned_256px_landmarks.astype(np.float32), + camera_matrix, + np.zeros((4, 1)) ) + + pitch, yaw, roll = mathlib.rotationMatrixToEulerAngles( cv2.Rodrigues(rotation_vector)[0] ) + pitch = np.clip ( pitch/1.30, -1.0, 1.0 ) + yaw = np.clip ( yaw / 1.11, -1.0, 1.0 ) + roll = np.clip ( roll/3.15, -1.0, 1.0 ) + return -pitch, yaw, roll diff --git a/facelib/MTCExtractor.py b/facelib/MTCExtractor.py index 056e8ad..c524ab9 100644 --- a/facelib/MTCExtractor.py +++ b/facelib/MTCExtractor.py @@ -1,350 +1,350 @@ -import numpy as np -import os -import cv2 - -from pathlib import Path -from nnlib import nnlib - -class MTCExtractor(object): - def __init__(self): - self.scale_to = 1920 - - self.min_face_size = self.scale_to * 0.042 - self.thresh1 = 0.7 - self.thresh2 = 0.85 - self.thresh3 = 0.6 - self.scale_factor = 0.95 - - exec( nnlib.import_all(), locals(), globals() ) - PNet_Input = Input ( (None, None,3) ) - x = PNet_Input - x = Conv2D (10, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) - x = PReLU (shared_axes=[1,2], name="PReLU1" )(x) - x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) - x = Conv2D (16, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) - x = PReLU (shared_axes=[1,2], name="PReLU2" )(x) - x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) - x = PReLU (shared_axes=[1,2], name="PReLU3" )(x) - prob = Conv2D (2, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv41")(x) - prob = Softmax()(prob) - x = Conv2D (4, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv42")(x) - - PNet_model = Model(PNet_Input, [x,prob] ) - PNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_pnet.h5').__str__() ) - - RNet_Input = Input ( (24, 24, 3) ) - x = RNet_Input - x = Conv2D (28, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) - x = PReLU (shared_axes=[1,2], name="prelu1" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) - x = Conv2D (48, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) - x = PReLU (shared_axes=[1,2], name="prelu2" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) - x = Conv2D (64, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv3")(x) - x = PReLU (shared_axes=[1,2], name="prelu3" )(x) - x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) - x = Dense (128, name='conv4')(x) - x = PReLU (name="prelu4" )(x) - prob = Dense (2, name='conv51')(x) - prob = Softmax()(prob) - x = Dense (4, name='conv52')(x) - RNet_model = Model(RNet_Input, [x,prob] ) - RNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_rnet.h5').__str__() ) - - ONet_Input = Input ( (48, 48, 3) ) - x = ONet_Input - x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) - x = PReLU (shared_axes=[1,2], name="prelu1" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) - x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) - x = PReLU (shared_axes=[1,2], name="prelu2" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) - x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) - x = PReLU (shared_axes=[1,2], name="prelu3" )(x) - x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) - x = Conv2D (128, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv4")(x) - x = PReLU (shared_axes=[1,2], name="prelu4" )(x) - x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) - x = Dense (256, name='conv5')(x) - x = PReLU (name="prelu5" )(x) - prob = Dense (2, name='conv61')(x) - prob = Softmax()(prob) - x1 = Dense (4, name='conv62')(x) - x2 = Dense (10, name='conv63')(x) - ONet_model = Model(ONet_Input, [x1,x2,prob] ) - ONet_model.load_weights ( (Path(__file__).parent / 'mtcnn_onet.h5').__str__() ) - - self.pnet_fun = K.function ( PNet_model.inputs, PNet_model.outputs ) - self.rnet_fun = K.function ( RNet_model.inputs, RNet_model.outputs ) - self.onet_fun = K.function ( ONet_model.inputs, ONet_model.outputs ) - - def __enter__(self): - faces, pnts = detect_face ( np.zeros ( (self.scale_to, self.scale_to, 3)), self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) - - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def extract (self, input_image, is_bgr=True): - - if is_bgr: - input_image = input_image[:,:,::-1].copy() - is_bgr = False - - (h, w, ch) = input_image.shape - - input_scale = self.scale_to / max(w,h) - input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) - - detected_faces, pnts = detect_face ( input_image, self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) - detected_faces = [ ( int(face[0]/input_scale), int(face[1]/input_scale), int(face[2]/input_scale), int(face[3]/input_scale)) for face in detected_faces ] - - return detected_faces - -def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): - """Detects faces in an image, and returns bounding boxes and points for them. - img: input image - minsize: minimum faces' size - pnet, rnet, onet: caffemodel - threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold - factor: the factor used to create a scaling pyramid of face sizes to detect in the image. - """ - factor_count=0 - total_boxes=np.empty((0,9)) - points=np.empty(0) - h=img.shape[0] - w=img.shape[1] - minl=np.amin([h, w]) - m=12.0/minsize - minl=minl*m - # create scale pyramid - scales=[] - while minl>=12: - scales += [m*np.power(factor, factor_count)] - minl = minl*factor - factor_count += 1 - # first stage - for scale in scales: - hs=int(np.ceil(h*scale)) - ws=int(np.ceil(w*scale)) - #print ('scale %f %d %d' % (scale, ws,hs)) - im_data = imresample(img, (hs, ws)) - im_data = (im_data-127.5)*0.0078125 - img_x = np.expand_dims(im_data, 0) - img_y = np.transpose(img_x, (0,2,1,3)) - out = pnet([img_y]) - out0 = np.transpose(out[0], (0,2,1,3)) - out1 = np.transpose(out[1], (0,2,1,3)) - - boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) - - # inter-scale nms - pick = nms(boxes.copy(), 0.5, 'Union') - if boxes.size>0 and pick.size>0: - boxes = boxes[pick,:] - total_boxes = np.append(total_boxes, boxes, axis=0) - - numbox = total_boxes.shape[0] - if numbox>0: - pick = nms(total_boxes.copy(), 0.7, 'Union') - total_boxes = total_boxes[pick,:] - regw = total_boxes[:,2]-total_boxes[:,0] - regh = total_boxes[:,3]-total_boxes[:,1] - qq1 = total_boxes[:,0]+total_boxes[:,5]*regw - qq2 = total_boxes[:,1]+total_boxes[:,6]*regh - qq3 = total_boxes[:,2]+total_boxes[:,7]*regw - qq4 = total_boxes[:,3]+total_boxes[:,8]*regh - total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) - total_boxes = rerec(total_boxes.copy()) - total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) - dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) - - numbox = total_boxes.shape[0] - if numbox>0: - # second stage - tempimg = np.zeros((24,24,3,numbox)) - for k in range(0,numbox): - tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) - tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] - if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: - tempimg[:,:,:,k] = imresample(tmp, (24, 24)) - else: - return np.empty() - tempimg = (tempimg-127.5)*0.0078125 - tempimg1 = np.transpose(tempimg, (3,1,0,2)) - out = rnet([tempimg1]) - out0 = np.transpose(out[0]) - out1 = np.transpose(out[1]) - score = out1[1,:] - ipass = np.where(score>threshold[1]) - total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) - mv = out0[:,ipass[0]] - if total_boxes.shape[0]>0: - pick = nms(total_boxes, 0.7, 'Union') - total_boxes = total_boxes[pick,:] - total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) - total_boxes = rerec(total_boxes.copy()) - - numbox = total_boxes.shape[0] - if numbox>0: - # third stage - total_boxes = np.fix(total_boxes).astype(np.int32) - dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) - tempimg = np.zeros((48,48,3,numbox)) - for k in range(0,numbox): - tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) - tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] - if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: - tempimg[:,:,:,k] = imresample(tmp, (48, 48)) - else: - return np.empty() - tempimg = (tempimg-127.5)*0.0078125 - tempimg1 = np.transpose(tempimg, (3,1,0,2)) - out = onet([tempimg1]) - out0 = np.transpose(out[0]) - out1 = np.transpose(out[1]) - out2 = np.transpose(out[2]) - score = out2[1,:] - points = out1 - ipass = np.where(score>threshold[2]) - points = points[:,ipass[0]] - total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) - mv = out0[:,ipass[0]] - - w = total_boxes[:,2]-total_boxes[:,0]+1 - h = total_boxes[:,3]-total_boxes[:,1]+1 - points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 - points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 - if total_boxes.shape[0]>0: - total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) - pick = nms(total_boxes.copy(), 0.7, 'Min') - total_boxes = total_boxes[pick,:] - points = points[:,pick] - - return total_boxes, points - - -# function [boundingbox] = bbreg(boundingbox,reg) -def bbreg(boundingbox,reg): - """Calibrate bounding boxes""" - if reg.shape[1]==1: - reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) - - w = boundingbox[:,2]-boundingbox[:,0]+1 - h = boundingbox[:,3]-boundingbox[:,1]+1 - b1 = boundingbox[:,0]+reg[:,0]*w - b2 = boundingbox[:,1]+reg[:,1]*h - b3 = boundingbox[:,2]+reg[:,2]*w - b4 = boundingbox[:,3]+reg[:,3]*h - boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) - return boundingbox - -def generateBoundingBox(imap, reg, scale, t): - """Use heatmap to generate bounding boxes""" - stride=2 - cellsize=12 - - imap = np.transpose(imap) - dx1 = np.transpose(reg[:,:,0]) - dy1 = np.transpose(reg[:,:,1]) - dx2 = np.transpose(reg[:,:,2]) - dy2 = np.transpose(reg[:,:,3]) - y, x = np.where(imap >= t) - if y.shape[0]==1: - dx1 = np.flipud(dx1) - dy1 = np.flipud(dy1) - dx2 = np.flipud(dx2) - dy2 = np.flipud(dy2) - score = imap[(y,x)] - reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) - if reg.size==0: - reg = np.empty((0,3)) - bb = np.transpose(np.vstack([y,x])) - q1 = np.fix((stride*bb+1)/scale) - q2 = np.fix((stride*bb+cellsize-1+1)/scale) - boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) - return boundingbox, reg - -# function pick = nms(boxes,threshold,type) -def nms(boxes, threshold, method): - if boxes.size==0: - return np.empty((0,3)) - x1 = boxes[:,0] - y1 = boxes[:,1] - x2 = boxes[:,2] - y2 = boxes[:,3] - s = boxes[:,4] - area = (x2-x1+1) * (y2-y1+1) - I = np.argsort(s) - pick = np.zeros_like(s, dtype=np.int16) - counter = 0 - while I.size>0: - i = I[-1] - pick[counter] = i - counter += 1 - idx = I[0:-1] - xx1 = np.maximum(x1[i], x1[idx]) - yy1 = np.maximum(y1[i], y1[idx]) - xx2 = np.minimum(x2[i], x2[idx]) - yy2 = np.minimum(y2[i], y2[idx]) - w = np.maximum(0.0, xx2-xx1+1) - h = np.maximum(0.0, yy2-yy1+1) - inter = w * h - if method == 'Min': - o = inter / np.minimum(area[i], area[idx]) - else: - o = inter / (area[i] + area[idx] - inter) - I = I[np.where(o<=threshold)] - pick = pick[0:counter] - return pick - -# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) -def pad(total_boxes, w, h): - """Compute the padding coordinates (pad the bounding boxes to square)""" - tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) - tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) - numbox = total_boxes.shape[0] - - dx = np.ones((numbox), dtype=np.int32) - dy = np.ones((numbox), dtype=np.int32) - edx = tmpw.copy().astype(np.int32) - edy = tmph.copy().astype(np.int32) - - x = total_boxes[:,0].copy().astype(np.int32) - y = total_boxes[:,1].copy().astype(np.int32) - ex = total_boxes[:,2].copy().astype(np.int32) - ey = total_boxes[:,3].copy().astype(np.int32) - - tmp = np.where(ex>w) - edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) - ex[tmp] = w - - tmp = np.where(ey>h) - edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) - ey[tmp] = h - - tmp = np.where(x<1) - dx.flat[tmp] = np.expand_dims(2-x[tmp],1) - x[tmp] = 1 - - tmp = np.where(y<1) - dy.flat[tmp] = np.expand_dims(2-y[tmp],1) - y[tmp] = 1 - - return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph - -# function [bboxA] = rerec(bboxA) -def rerec(bboxA): - """Convert bboxA to square.""" - h = bboxA[:,3]-bboxA[:,1] - w = bboxA[:,2]-bboxA[:,0] - l = np.maximum(w, h) - bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 - bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 - bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) - return bboxA - -def imresample(img, sz): - im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable - return im_data +import numpy as np +import os +import cv2 + +from pathlib import Path +from nnlib import nnlib + +class MTCExtractor(object): + def __init__(self): + self.scale_to = 1920 + + self.min_face_size = self.scale_to * 0.042 + self.thresh1 = 0.7 + self.thresh2 = 0.85 + self.thresh3 = 0.6 + self.scale_factor = 0.95 + + exec( nnlib.import_all(), locals(), globals() ) + PNet_Input = Input ( (None, None,3) ) + x = PNet_Input + x = Conv2D (10, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) + x = PReLU (shared_axes=[1,2], name="PReLU1" )(x) + x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) + x = Conv2D (16, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) + x = PReLU (shared_axes=[1,2], name="PReLU2" )(x) + x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) + x = PReLU (shared_axes=[1,2], name="PReLU3" )(x) + prob = Conv2D (2, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv41")(x) + prob = Softmax()(prob) + x = Conv2D (4, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv42")(x) + + PNet_model = Model(PNet_Input, [x,prob] ) + PNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_pnet.h5').__str__() ) + + RNet_Input = Input ( (24, 24, 3) ) + x = RNet_Input + x = Conv2D (28, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) + x = PReLU (shared_axes=[1,2], name="prelu1" )(x) + x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) + x = Conv2D (48, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) + x = PReLU (shared_axes=[1,2], name="prelu2" )(x) + x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) + x = Conv2D (64, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv3")(x) + x = PReLU (shared_axes=[1,2], name="prelu3" )(x) + x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) + x = Dense (128, name='conv4')(x) + x = PReLU (name="prelu4" )(x) + prob = Dense (2, name='conv51')(x) + prob = Softmax()(prob) + x = Dense (4, name='conv52')(x) + RNet_model = Model(RNet_Input, [x,prob] ) + RNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_rnet.h5').__str__() ) + + ONet_Input = Input ( (48, 48, 3) ) + x = ONet_Input + x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) + x = PReLU (shared_axes=[1,2], name="prelu1" )(x) + x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) + x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) + x = PReLU (shared_axes=[1,2], name="prelu2" )(x) + x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) + x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) + x = PReLU (shared_axes=[1,2], name="prelu3" )(x) + x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) + x = Conv2D (128, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv4")(x) + x = PReLU (shared_axes=[1,2], name="prelu4" )(x) + x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) + x = Dense (256, name='conv5')(x) + x = PReLU (name="prelu5" )(x) + prob = Dense (2, name='conv61')(x) + prob = Softmax()(prob) + x1 = Dense (4, name='conv62')(x) + x2 = Dense (10, name='conv63')(x) + ONet_model = Model(ONet_Input, [x1,x2,prob] ) + ONet_model.load_weights ( (Path(__file__).parent / 'mtcnn_onet.h5').__str__() ) + + self.pnet_fun = K.function ( PNet_model.inputs, PNet_model.outputs ) + self.rnet_fun = K.function ( RNet_model.inputs, RNet_model.outputs ) + self.onet_fun = K.function ( ONet_model.inputs, ONet_model.outputs ) + + def __enter__(self): + faces, pnts = detect_face ( np.zeros ( (self.scale_to, self.scale_to, 3)), self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) + + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def extract (self, input_image, is_bgr=True): + + if is_bgr: + input_image = input_image[:,:,::-1].copy() + is_bgr = False + + (h, w, ch) = input_image.shape + + input_scale = self.scale_to / max(w,h) + input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) + + detected_faces, pnts = detect_face ( input_image, self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) + detected_faces = [ ( int(face[0]/input_scale), int(face[1]/input_scale), int(face[2]/input_scale), int(face[3]/input_scale)) for face in detected_faces ] + + return detected_faces + +def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): + """Detects faces in an image, and returns bounding boxes and points for them. + img: input image + minsize: minimum faces' size + pnet, rnet, onet: caffemodel + threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold + factor: the factor used to create a scaling pyramid of face sizes to detect in the image. + """ + factor_count=0 + total_boxes=np.empty((0,9)) + points=np.empty(0) + h=img.shape[0] + w=img.shape[1] + minl=np.amin([h, w]) + m=12.0/minsize + minl=minl*m + # create scale pyramid + scales=[] + while minl>=12: + scales += [m*np.power(factor, factor_count)] + minl = minl*factor + factor_count += 1 + # first stage + for scale in scales: + hs=int(np.ceil(h*scale)) + ws=int(np.ceil(w*scale)) + #print ('scale %f %d %d' % (scale, ws,hs)) + im_data = imresample(img, (hs, ws)) + im_data = (im_data-127.5)*0.0078125 + img_x = np.expand_dims(im_data, 0) + img_y = np.transpose(img_x, (0,2,1,3)) + out = pnet([img_y]) + out0 = np.transpose(out[0], (0,2,1,3)) + out1 = np.transpose(out[1], (0,2,1,3)) + + boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) + + # inter-scale nms + pick = nms(boxes.copy(), 0.5, 'Union') + if boxes.size>0 and pick.size>0: + boxes = boxes[pick,:] + total_boxes = np.append(total_boxes, boxes, axis=0) + + numbox = total_boxes.shape[0] + if numbox>0: + pick = nms(total_boxes.copy(), 0.7, 'Union') + total_boxes = total_boxes[pick,:] + regw = total_boxes[:,2]-total_boxes[:,0] + regh = total_boxes[:,3]-total_boxes[:,1] + qq1 = total_boxes[:,0]+total_boxes[:,5]*regw + qq2 = total_boxes[:,1]+total_boxes[:,6]*regh + qq3 = total_boxes[:,2]+total_boxes[:,7]*regw + qq4 = total_boxes[:,3]+total_boxes[:,8]*regh + total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) + total_boxes = rerec(total_boxes.copy()) + total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) + + numbox = total_boxes.shape[0] + if numbox>0: + # second stage + tempimg = np.zeros((24,24,3,numbox)) + for k in range(0,numbox): + tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) + tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] + if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: + tempimg[:,:,:,k] = imresample(tmp, (24, 24)) + else: + return np.empty() + tempimg = (tempimg-127.5)*0.0078125 + tempimg1 = np.transpose(tempimg, (3,1,0,2)) + out = rnet([tempimg1]) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + score = out1[1,:] + ipass = np.where(score>threshold[1]) + total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) + mv = out0[:,ipass[0]] + if total_boxes.shape[0]>0: + pick = nms(total_boxes, 0.7, 'Union') + total_boxes = total_boxes[pick,:] + total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) + total_boxes = rerec(total_boxes.copy()) + + numbox = total_boxes.shape[0] + if numbox>0: + # third stage + total_boxes = np.fix(total_boxes).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) + tempimg = np.zeros((48,48,3,numbox)) + for k in range(0,numbox): + tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) + tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] + if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: + tempimg[:,:,:,k] = imresample(tmp, (48, 48)) + else: + return np.empty() + tempimg = (tempimg-127.5)*0.0078125 + tempimg1 = np.transpose(tempimg, (3,1,0,2)) + out = onet([tempimg1]) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + out2 = np.transpose(out[2]) + score = out2[1,:] + points = out1 + ipass = np.where(score>threshold[2]) + points = points[:,ipass[0]] + total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) + mv = out0[:,ipass[0]] + + w = total_boxes[:,2]-total_boxes[:,0]+1 + h = total_boxes[:,3]-total_boxes[:,1]+1 + points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 + points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 + if total_boxes.shape[0]>0: + total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) + pick = nms(total_boxes.copy(), 0.7, 'Min') + total_boxes = total_boxes[pick,:] + points = points[:,pick] + + return total_boxes, points + + +# function [boundingbox] = bbreg(boundingbox,reg) +def bbreg(boundingbox,reg): + """Calibrate bounding boxes""" + if reg.shape[1]==1: + reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) + + w = boundingbox[:,2]-boundingbox[:,0]+1 + h = boundingbox[:,3]-boundingbox[:,1]+1 + b1 = boundingbox[:,0]+reg[:,0]*w + b2 = boundingbox[:,1]+reg[:,1]*h + b3 = boundingbox[:,2]+reg[:,2]*w + b4 = boundingbox[:,3]+reg[:,3]*h + boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) + return boundingbox + +def generateBoundingBox(imap, reg, scale, t): + """Use heatmap to generate bounding boxes""" + stride=2 + cellsize=12 + + imap = np.transpose(imap) + dx1 = np.transpose(reg[:,:,0]) + dy1 = np.transpose(reg[:,:,1]) + dx2 = np.transpose(reg[:,:,2]) + dy2 = np.transpose(reg[:,:,3]) + y, x = np.where(imap >= t) + if y.shape[0]==1: + dx1 = np.flipud(dx1) + dy1 = np.flipud(dy1) + dx2 = np.flipud(dx2) + dy2 = np.flipud(dy2) + score = imap[(y,x)] + reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) + if reg.size==0: + reg = np.empty((0,3)) + bb = np.transpose(np.vstack([y,x])) + q1 = np.fix((stride*bb+1)/scale) + q2 = np.fix((stride*bb+cellsize-1+1)/scale) + boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) + return boundingbox, reg + +# function pick = nms(boxes,threshold,type) +def nms(boxes, threshold, method): + if boxes.size==0: + return np.empty((0,3)) + x1 = boxes[:,0] + y1 = boxes[:,1] + x2 = boxes[:,2] + y2 = boxes[:,3] + s = boxes[:,4] + area = (x2-x1+1) * (y2-y1+1) + I = np.argsort(s) + pick = np.zeros_like(s, dtype=np.int16) + counter = 0 + while I.size>0: + i = I[-1] + pick[counter] = i + counter += 1 + idx = I[0:-1] + xx1 = np.maximum(x1[i], x1[idx]) + yy1 = np.maximum(y1[i], y1[idx]) + xx2 = np.minimum(x2[i], x2[idx]) + yy2 = np.minimum(y2[i], y2[idx]) + w = np.maximum(0.0, xx2-xx1+1) + h = np.maximum(0.0, yy2-yy1+1) + inter = w * h + if method == 'Min': + o = inter / np.minimum(area[i], area[idx]) + else: + o = inter / (area[i] + area[idx] - inter) + I = I[np.where(o<=threshold)] + pick = pick[0:counter] + return pick + +# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) +def pad(total_boxes, w, h): + """Compute the padding coordinates (pad the bounding boxes to square)""" + tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) + tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) + numbox = total_boxes.shape[0] + + dx = np.ones((numbox), dtype=np.int32) + dy = np.ones((numbox), dtype=np.int32) + edx = tmpw.copy().astype(np.int32) + edy = tmph.copy().astype(np.int32) + + x = total_boxes[:,0].copy().astype(np.int32) + y = total_boxes[:,1].copy().astype(np.int32) + ex = total_boxes[:,2].copy().astype(np.int32) + ey = total_boxes[:,3].copy().astype(np.int32) + + tmp = np.where(ex>w) + edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) + ex[tmp] = w + + tmp = np.where(ey>h) + edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) + ey[tmp] = h + + tmp = np.where(x<1) + dx.flat[tmp] = np.expand_dims(2-x[tmp],1) + x[tmp] = 1 + + tmp = np.where(y<1) + dy.flat[tmp] = np.expand_dims(2-y[tmp],1) + y[tmp] = 1 + + return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph + +# function [bboxA] = rerec(bboxA) +def rerec(bboxA): + """Convert bboxA to square.""" + h = bboxA[:,3]-bboxA[:,1] + w = bboxA[:,2]-bboxA[:,0] + l = np.maximum(w, h) + bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 + bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 + bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) + return bboxA + +def imresample(img, sz): + im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable + return im_data diff --git a/facelib/PoseEstimator.py b/facelib/PoseEstimator.py index 586f3ce..b0011ad 100644 --- a/facelib/PoseEstimator.py +++ b/facelib/PoseEstimator.py @@ -1,313 +1,313 @@ -import os -import pickle -from functools import partial -from pathlib import Path - -import cv2 -import numpy as np - -from interact import interact as io -from nnlib import nnlib - -""" -PoseEstimator estimates pitch, yaw, roll, from FAN aligned face. -trained on https://www.umdfaces.io -based on https://arxiv.org/pdf/1901.06778.pdf HYBRID COARSE-FINE CLASSIFICATION FOR HEAD POSE ESTIMATION -""" - -class PoseEstimator(object): - VERSION = 1 - def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): - exec( nnlib.import_all(), locals(), globals() ) - self.resolution = resolution - - self.angles = [60, 45, 30, 10, 2] - self.alpha_cat_losses = [7,5,3,1,1] - self.class_nums = [ angle+1 for angle in self.angles ] - self.encoder, self.decoder, self.model_l = PoseEstimator.BuildModels(resolution, class_nums=self.class_nums) - - if weights_file_root is not None: - weights_file_root = Path(weights_file_root) - else: - weights_file_root = Path(__file__).parent - - self.encoder_weights_path = weights_file_root / ('PoseEst_%d_%s_enc.h5' % (resolution, face_type_str) ) - self.decoder_weights_path = weights_file_root / ('PoseEst_%d_%s_dec.h5' % (resolution, face_type_str) ) - self.l_weights_path = weights_file_root / ('PoseEst_%d_%s_l.h5' % (resolution, face_type_str) ) - - self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) ) - - self.input_bgr_shape = (resolution, resolution, 3) - - def ResamplerFunc(input): - mean_t, logvar_t = input - return mean_t + K.exp(0.5*logvar_t)*K.random_normal(K.shape(mean_t)) - - self.BVAEResampler = Lambda ( lambda x: x[0] + K.exp(0.5*x[1])*K.random_normal(K.shape(x[0])), - output_shape=K.int_shape(self.encoder.outputs[0])[1:] ) - - inp_t = Input (self.input_bgr_shape) - inp_real_t = Input (self.input_bgr_shape) - inp_pitch_t = Input ( (1,) ) - inp_yaw_t = Input ( (1,) ) - inp_roll_t = Input ( (1,) ) - - - mean_t, logvar_t = self.encoder(inp_t) - - latent_t = self.BVAEResampler([mean_t, logvar_t]) - - if training: - bgr_t = self.decoder (latent_t) - pyrs_t = self.model_l(latent_t) - else: - self.model = Model(inp_t, self.model_l(latent_t) ) - pyrs_t = self.model(inp_t) - - if load_weights: - if training: - self.encoder.load_weights (str(self.encoder_weights_path)) - self.decoder.load_weights (str(self.decoder_weights_path)) - self.model_l.load_weights (str(self.l_weights_path)) - else: - self.model.load_weights (str(self.model_weights_path)) - - else: - def gather_Conv2D_layers(models_list): - conv_weights_list = [] - for model in models_list: - for layer in model.layers: - layer_type = type(layer) - if layer_type == keras.layers.Conv2D: - conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights - elif layer_type == keras.engine.training.Model: - conv_weights_list += gather_Conv2D_layers ([layer]) - return conv_weights_list - - CAInitializerMP ( gather_Conv2D_layers( [self.encoder, self.decoder] ) ) - - - if training: - inp_pyrs_t = [] - for class_num in self.class_nums: - inp_pyrs_t += [ Input ((3,)) ] - - pyr_loss = [] - - for i,class_num in enumerate(self.class_nums): - a = self.alpha_cat_losses[i] - pyr_loss += [ a*K.mean( K.square ( inp_pyrs_t[i] - pyrs_t[i]) ) ] - - def BVAELoss(beta=4): - #keep in mind loss per sample, not per minibatch - def func(input): - mean_t, logvar_t = input - return beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True ) - return func - - BVAE_loss = BVAELoss(4)([mean_t, logvar_t])#beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True ) - - - bgr_loss = K.mean(K.square(inp_real_t-bgr_t), axis=0, keepdims=True) - - #train_loss = BVAE_loss + bgr_loss - - pyr_loss = sum(pyr_loss) - - - self.train = K.function ([inp_t, inp_real_t], - [ K.mean (BVAE_loss)+K.mean(bgr_loss) ], Adam(lr=0.0005, beta_1=0.9, beta_2=0.999).get_updates( [BVAE_loss, bgr_loss], self.encoder.trainable_weights+self.decoder.trainable_weights ) ) - - self.train_l = K.function ([inp_t] + inp_pyrs_t, - [pyr_loss], Adam(lr=0.0001).get_updates( pyr_loss, self.model_l.trainable_weights) ) - - - self.view = K.function ([inp_t], [ bgr_t, pyrs_t[0] ] ) - - def __enter__(self): - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def save_weights(self): - self.encoder.save_weights (str(self.encoder_weights_path)) - self.decoder.save_weights (str(self.decoder_weights_path)) - self.model_l.save_weights (str(self.l_weights_path)) - - inp_t = Input (self.input_bgr_shape) - - Model(inp_t, self.model_l(self.BVAEResampler(self.encoder(inp_t))) ).save_weights (str(self.model_weights_path)) - - def train_on_batch(self, warps, imgs, pyr_tanh, skip_bgr_train=False): - - if not skip_bgr_train: - bgr_loss, = self.train( [warps, imgs] ) - pyr_loss = 0 - else: - bgr_loss = 0 - - feed = [imgs] - for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)): - a = angle / 2 - c = np.round( (pyr_tanh+1) * a ) / a -1 #.astype(K.floatx()) - feed += [c] - - pyr_loss, = self.train_l(feed) - - return bgr_loss, pyr_loss - - def extract (self, input_image, is_input_tanh=False): - if is_input_tanh: - raise NotImplemented("is_input_tanh") - - input_shape_len = len(input_image.shape) - if input_shape_len == 3: - input_image = input_image[np.newaxis,...] - - bgr, result, = self.view( [input_image] ) - - - #result = np.clip ( result / (self.angles[0] / 2) - 1, 0.0, 1.0 ) - - if input_shape_len == 3: - bgr = bgr[0] - result = result[0] - - return bgr, result - - @staticmethod - def BuildModels ( resolution, class_nums, ae_dims=128): - exec( nnlib.import_all(), locals(), globals() ) - - x = inp = Input ( (resolution,resolution,3) ) - x = PoseEstimator.EncFlow(ae_dims)(x) - encoder = Model(inp,x) - - x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) - x = PoseEstimator.DecFlow(resolution, ae_dims)(x) - decoder = Model(inp,x) - - x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) - x = PoseEstimator.LatentFlow(class_nums=class_nums)(x) - model_l = Model(inp, x ) - - return encoder, decoder, model_l - - @staticmethod - def EncFlow(ae_dims): - exec( nnlib.import_all(), locals(), globals() ) - - XConv2D = partial(Conv2D, padding='zero') - - - def downscale (dim, **kwargs): - def func(x): - return ReLU() ( ( XConv2D(dim, kernel_size=4, strides=2)(x)) ) - return func - - - downscale = partial(downscale) - - ed_ch_dims = 128 - - def func(input): - x = input - x = downscale(64)(x) - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = Flatten()(x) - - x = Dense(256)(x) - x = ReLU()(x) - - x = Dense(256)(x) - x = ReLU()(x) - - mean = Dense(ae_dims)(x) - logvar = Dense(ae_dims)(x) - - return mean, logvar - - return func - - @staticmethod - def DecFlow(resolution, ae_dims): - exec( nnlib.import_all(), locals(), globals() ) - - XConv2D = partial(Conv2D, padding='zero') - - def upscale (dim, strides=2, **kwargs): - def func(x): - return ReLU()( ( Conv2DTranspose(dim, kernel_size=4, strides=strides, padding='same')(x)) ) - return func - - def to_bgr (output_nc, **kwargs): - def func(x): - return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x) - return func - - upscale = partial(upscale) - lowest_dense_res = resolution // 16 - - def func(input): - x = input - - x = Dense(256)(x) - x = ReLU()(x) - - x = Dense(256)(x) - x = ReLU()(x) - - x = Dense( (lowest_dense_res*lowest_dense_res*256) ) (x) - x = ReLU()(x) - - x = Reshape( (lowest_dense_res,lowest_dense_res,256) )(x) - - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - x = upscale(64)(x) - x = to_bgr(3)(x) - - return x - return func - - @staticmethod - def LatentFlow(class_nums): - exec( nnlib.import_all(), locals(), globals() ) - - XConv2D = partial(Conv2D, padding='zero') - - def func(latent): - x = latent - - x = Dense(1024, activation='relu')(x) - x = Dropout(0.5)(x) - x = Dense(1024, activation='relu')(x) - # x = Dropout(0.5)(x) - # x = Dense(4096, activation='relu')(x) - - output = [] - for class_num in class_nums: - pyr = Dense(3, activation='tanh')(x) - output += [pyr] - - return output - - #y = Dropout(0.5)(y) - #y = Dense(1024, activation='relu')(y) - return func - - -# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg') -# x = resnet50(x) -# output = [] -# for class_num in class_nums: -# pitch = Dense(class_num)(x) -# yaw = Dense(class_num)(x) -# roll = Dense(class_num)(x) -# output += [pitch,yaw,roll] - -# return output +import os +import pickle +from functools import partial +from pathlib import Path + +import cv2 +import numpy as np + +from interact import interact as io +from nnlib import nnlib + +""" +PoseEstimator estimates pitch, yaw, roll, from FAN aligned face. +trained on https://www.umdfaces.io +based on https://arxiv.org/pdf/1901.06778.pdf HYBRID COARSE-FINE CLASSIFICATION FOR HEAD POSE ESTIMATION +""" + +class PoseEstimator(object): + VERSION = 1 + def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): + exec( nnlib.import_all(), locals(), globals() ) + self.resolution = resolution + + self.angles = [60, 45, 30, 10, 2] + self.alpha_cat_losses = [7,5,3,1,1] + self.class_nums = [ angle+1 for angle in self.angles ] + self.encoder, self.decoder, self.model_l = PoseEstimator.BuildModels(resolution, class_nums=self.class_nums) + + if weights_file_root is not None: + weights_file_root = Path(weights_file_root) + else: + weights_file_root = Path(__file__).parent + + self.encoder_weights_path = weights_file_root / ('PoseEst_%d_%s_enc.h5' % (resolution, face_type_str) ) + self.decoder_weights_path = weights_file_root / ('PoseEst_%d_%s_dec.h5' % (resolution, face_type_str) ) + self.l_weights_path = weights_file_root / ('PoseEst_%d_%s_l.h5' % (resolution, face_type_str) ) + + self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) ) + + self.input_bgr_shape = (resolution, resolution, 3) + + def ResamplerFunc(input): + mean_t, logvar_t = input + return mean_t + K.exp(0.5*logvar_t)*K.random_normal(K.shape(mean_t)) + + self.BVAEResampler = Lambda ( lambda x: x[0] + K.exp(0.5*x[1])*K.random_normal(K.shape(x[0])), + output_shape=K.int_shape(self.encoder.outputs[0])[1:] ) + + inp_t = Input (self.input_bgr_shape) + inp_real_t = Input (self.input_bgr_shape) + inp_pitch_t = Input ( (1,) ) + inp_yaw_t = Input ( (1,) ) + inp_roll_t = Input ( (1,) ) + + + mean_t, logvar_t = self.encoder(inp_t) + + latent_t = self.BVAEResampler([mean_t, logvar_t]) + + if training: + bgr_t = self.decoder (latent_t) + pyrs_t = self.model_l(latent_t) + else: + self.model = Model(inp_t, self.model_l(latent_t) ) + pyrs_t = self.model(inp_t) + + if load_weights: + if training: + self.encoder.load_weights (str(self.encoder_weights_path)) + self.decoder.load_weights (str(self.decoder_weights_path)) + self.model_l.load_weights (str(self.l_weights_path)) + else: + self.model.load_weights (str(self.model_weights_path)) + + else: + def gather_Conv2D_layers(models_list): + conv_weights_list = [] + for model in models_list: + for layer in model.layers: + layer_type = type(layer) + if layer_type == keras.layers.Conv2D: + conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights + elif layer_type == keras.engine.training.Model: + conv_weights_list += gather_Conv2D_layers ([layer]) + return conv_weights_list + + CAInitializerMP ( gather_Conv2D_layers( [self.encoder, self.decoder] ) ) + + + if training: + inp_pyrs_t = [] + for class_num in self.class_nums: + inp_pyrs_t += [ Input ((3,)) ] + + pyr_loss = [] + + for i,class_num in enumerate(self.class_nums): + a = self.alpha_cat_losses[i] + pyr_loss += [ a*K.mean( K.square ( inp_pyrs_t[i] - pyrs_t[i]) ) ] + + def BVAELoss(beta=4): + #keep in mind loss per sample, not per minibatch + def func(input): + mean_t, logvar_t = input + return beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True ) + return func + + BVAE_loss = BVAELoss(4)([mean_t, logvar_t])#beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True ) + + + bgr_loss = K.mean(K.square(inp_real_t-bgr_t), axis=0, keepdims=True) + + #train_loss = BVAE_loss + bgr_loss + + pyr_loss = sum(pyr_loss) + + + self.train = K.function ([inp_t, inp_real_t], + [ K.mean (BVAE_loss)+K.mean(bgr_loss) ], Adam(lr=0.0005, beta_1=0.9, beta_2=0.999).get_updates( [BVAE_loss, bgr_loss], self.encoder.trainable_weights+self.decoder.trainable_weights ) ) + + self.train_l = K.function ([inp_t] + inp_pyrs_t, + [pyr_loss], Adam(lr=0.0001).get_updates( pyr_loss, self.model_l.trainable_weights) ) + + + self.view = K.function ([inp_t], [ bgr_t, pyrs_t[0] ] ) + + def __enter__(self): + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def save_weights(self): + self.encoder.save_weights (str(self.encoder_weights_path)) + self.decoder.save_weights (str(self.decoder_weights_path)) + self.model_l.save_weights (str(self.l_weights_path)) + + inp_t = Input (self.input_bgr_shape) + + Model(inp_t, self.model_l(self.BVAEResampler(self.encoder(inp_t))) ).save_weights (str(self.model_weights_path)) + + def train_on_batch(self, warps, imgs, pyr_tanh, skip_bgr_train=False): + + if not skip_bgr_train: + bgr_loss, = self.train( [warps, imgs] ) + pyr_loss = 0 + else: + bgr_loss = 0 + + feed = [imgs] + for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)): + a = angle / 2 + c = np.round( (pyr_tanh+1) * a ) / a -1 #.astype(K.floatx()) + feed += [c] + + pyr_loss, = self.train_l(feed) + + return bgr_loss, pyr_loss + + def extract (self, input_image, is_input_tanh=False): + if is_input_tanh: + raise NotImplemented("is_input_tanh") + + input_shape_len = len(input_image.shape) + if input_shape_len == 3: + input_image = input_image[np.newaxis,...] + + bgr, result, = self.view( [input_image] ) + + + #result = np.clip ( result / (self.angles[0] / 2) - 1, 0.0, 1.0 ) + + if input_shape_len == 3: + bgr = bgr[0] + result = result[0] + + return bgr, result + + @staticmethod + def BuildModels ( resolution, class_nums, ae_dims=128): + exec( nnlib.import_all(), locals(), globals() ) + + x = inp = Input ( (resolution,resolution,3) ) + x = PoseEstimator.EncFlow(ae_dims)(x) + encoder = Model(inp,x) + + x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) + x = PoseEstimator.DecFlow(resolution, ae_dims)(x) + decoder = Model(inp,x) + + x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) + x = PoseEstimator.LatentFlow(class_nums=class_nums)(x) + model_l = Model(inp, x ) + + return encoder, decoder, model_l + + @staticmethod + def EncFlow(ae_dims): + exec( nnlib.import_all(), locals(), globals() ) + + XConv2D = partial(Conv2D, padding='zero') + + + def downscale (dim, **kwargs): + def func(x): + return ReLU() ( ( XConv2D(dim, kernel_size=4, strides=2)(x)) ) + return func + + + downscale = partial(downscale) + + ed_ch_dims = 128 + + def func(input): + x = input + x = downscale(64)(x) + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = Flatten()(x) + + x = Dense(256)(x) + x = ReLU()(x) + + x = Dense(256)(x) + x = ReLU()(x) + + mean = Dense(ae_dims)(x) + logvar = Dense(ae_dims)(x) + + return mean, logvar + + return func + + @staticmethod + def DecFlow(resolution, ae_dims): + exec( nnlib.import_all(), locals(), globals() ) + + XConv2D = partial(Conv2D, padding='zero') + + def upscale (dim, strides=2, **kwargs): + def func(x): + return ReLU()( ( Conv2DTranspose(dim, kernel_size=4, strides=strides, padding='same')(x)) ) + return func + + def to_bgr (output_nc, **kwargs): + def func(x): + return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x) + return func + + upscale = partial(upscale) + lowest_dense_res = resolution // 16 + + def func(input): + x = input + + x = Dense(256)(x) + x = ReLU()(x) + + x = Dense(256)(x) + x = ReLU()(x) + + x = Dense( (lowest_dense_res*lowest_dense_res*256) ) (x) + x = ReLU()(x) + + x = Reshape( (lowest_dense_res,lowest_dense_res,256) )(x) + + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + x = upscale(64)(x) + x = to_bgr(3)(x) + + return x + return func + + @staticmethod + def LatentFlow(class_nums): + exec( nnlib.import_all(), locals(), globals() ) + + XConv2D = partial(Conv2D, padding='zero') + + def func(latent): + x = latent + + x = Dense(1024, activation='relu')(x) + x = Dropout(0.5)(x) + x = Dense(1024, activation='relu')(x) + # x = Dropout(0.5)(x) + # x = Dense(4096, activation='relu')(x) + + output = [] + for class_num in class_nums: + pyr = Dense(3, activation='tanh')(x) + output += [pyr] + + return output + + #y = Dropout(0.5)(y) + #y = Dense(1024, activation='relu')(y) + return func + + +# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg') +# x = resnet50(x) +# output = [] +# for class_num in class_nums: +# pitch = Dense(class_num)(x) +# yaw = Dense(class_num)(x) +# roll = Dense(class_num)(x) +# output += [pitch,yaw,roll] + +# return output diff --git a/facelib/S3FDExtractor.py b/facelib/S3FDExtractor.py index 3487ac6..59e42f1 100644 --- a/facelib/S3FDExtractor.py +++ b/facelib/S3FDExtractor.py @@ -1,98 +1,98 @@ -import numpy as np -from pathlib import Path -import cv2 -from nnlib import nnlib - -class S3FDExtractor(object): - def __init__(self): - exec( nnlib.import_all(), locals(), globals() ) - - model_path = Path(__file__).parent / "S3FD.h5" - if not model_path.exists(): - return None - - self.model = nnlib.keras.models.load_model ( str(model_path) ) - - def __enter__(self): - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def extract (self, input_image, is_bgr=True): - - if is_bgr: - input_image = input_image[:,:,::-1] - is_bgr = False - - (h, w, ch) = input_image.shape - - d = max(w, h) - scale_to = 640 if d >= 1280 else d / 2 - scale_to = max(64, scale_to) - - input_scale = d / scale_to - input_image = cv2.resize (input_image, ( int(w/input_scale), int(h/input_scale) ), interpolation=cv2.INTER_LINEAR) - - olist = self.model.predict( np.expand_dims(input_image,0) ) - - detected_faces = [] - for ltrb in self.refine (olist): - l,t,r,b = [ x*input_scale for x in ltrb] - bt = b-t - if min(r-l,bt) < 40: #filtering faces < 40pix by any side - continue - b += bt*0.1 #enlarging bottom line a bit for 2DFAN-4, because default is not enough covering a chin - detected_faces.append ( [int(x) for x in (l,t,r,b) ] ) - - return detected_faces - - def refine(self, olist): - bboxlist = [] - for i, ((ocls,), (oreg,)) in enumerate ( zip ( olist[::2], olist[1::2] ) ): - stride = 2**(i + 2) # 4,8,16,32,64,128 - s_d2 = stride / 2 - s_m4 = stride * 4 - - for hindex, windex in zip(*np.where(ocls > 0.05)): - score = ocls[hindex, windex] - loc = oreg[hindex, windex, :] - priors = np.array([windex * stride + s_d2, hindex * stride + s_d2, s_m4, s_m4]) - priors_2p = priors[2:] - box = np.concatenate((priors[:2] + loc[:2] * 0.1 * priors_2p, - priors_2p * np.exp(loc[2:] * 0.2)) ) - box[:2] -= box[2:] / 2 - box[2:] += box[:2] - - bboxlist.append([*box, score]) - - bboxlist = np.array(bboxlist) - if len(bboxlist) == 0: - bboxlist = np.zeros((1, 5)) - - bboxlist = bboxlist[self.refine_nms(bboxlist, 0.3), :] - bboxlist = [ x[:-1].astype(np.int) for x in bboxlist if x[-1] >= 0.5] - return bboxlist - - def refine_nms(self, dets, thresh): - keep = list() - if len(dets) == 0: - return keep - - x_1, y_1, x_2, y_2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4] - areas = (x_2 - x_1 + 1) * (y_2 - y_1 + 1) - order = scores.argsort()[::-1] - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - xx_1, yy_1 = np.maximum(x_1[i], x_1[order[1:]]), np.maximum(y_1[i], y_1[order[1:]]) - xx_2, yy_2 = np.minimum(x_2[i], x_2[order[1:]]), np.minimum(y_2[i], y_2[order[1:]]) - - width, height = np.maximum(0.0, xx_2 - xx_1 + 1), np.maximum(0.0, yy_2 - yy_1 + 1) - ovr = width * height / (areas[i] + areas[order[1:]] - width * height) - - inds = np.where(ovr <= thresh)[0] - order = order[inds + 1] - return keep +import numpy as np +from pathlib import Path +import cv2 +from nnlib import nnlib + +class S3FDExtractor(object): + def __init__(self): + exec( nnlib.import_all(), locals(), globals() ) + + model_path = Path(__file__).parent / "S3FD.h5" + if not model_path.exists(): + return None + + self.model = nnlib.keras.models.load_model ( str(model_path) ) + + def __enter__(self): + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def extract (self, input_image, is_bgr=True): + + if is_bgr: + input_image = input_image[:,:,::-1] + is_bgr = False + + (h, w, ch) = input_image.shape + + d = max(w, h) + scale_to = 640 if d >= 1280 else d / 2 + scale_to = max(64, scale_to) + + input_scale = d / scale_to + input_image = cv2.resize (input_image, ( int(w/input_scale), int(h/input_scale) ), interpolation=cv2.INTER_LINEAR) + + olist = self.model.predict( np.expand_dims(input_image,0) ) + + detected_faces = [] + for ltrb in self.refine (olist): + l,t,r,b = [ x*input_scale for x in ltrb] + bt = b-t + if min(r-l,bt) < 40: #filtering faces < 40pix by any side + continue + b += bt*0.1 #enlarging bottom line a bit for 2DFAN-4, because default is not enough covering a chin + detected_faces.append ( [int(x) for x in (l,t,r,b) ] ) + + return detected_faces + + def refine(self, olist): + bboxlist = [] + for i, ((ocls,), (oreg,)) in enumerate ( zip ( olist[::2], olist[1::2] ) ): + stride = 2**(i + 2) # 4,8,16,32,64,128 + s_d2 = stride / 2 + s_m4 = stride * 4 + + for hindex, windex in zip(*np.where(ocls > 0.05)): + score = ocls[hindex, windex] + loc = oreg[hindex, windex, :] + priors = np.array([windex * stride + s_d2, hindex * stride + s_d2, s_m4, s_m4]) + priors_2p = priors[2:] + box = np.concatenate((priors[:2] + loc[:2] * 0.1 * priors_2p, + priors_2p * np.exp(loc[2:] * 0.2)) ) + box[:2] -= box[2:] / 2 + box[2:] += box[:2] + + bboxlist.append([*box, score]) + + bboxlist = np.array(bboxlist) + if len(bboxlist) == 0: + bboxlist = np.zeros((1, 5)) + + bboxlist = bboxlist[self.refine_nms(bboxlist, 0.3), :] + bboxlist = [ x[:-1].astype(np.int) for x in bboxlist if x[-1] >= 0.5] + return bboxlist + + def refine_nms(self, dets, thresh): + keep = list() + if len(dets) == 0: + return keep + + x_1, y_1, x_2, y_2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4] + areas = (x_2 - x_1 + 1) * (y_2 - y_1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx_1, yy_1 = np.maximum(x_1[i], x_1[order[1:]]), np.maximum(y_1[i], y_1[order[1:]]) + xx_2, yy_2 = np.minimum(x_2[i], x_2[order[1:]]), np.minimum(y_2[i], y_2[order[1:]]) + + width, height = np.maximum(0.0, xx_2 - xx_1 + 1), np.maximum(0.0, yy_2 - yy_1 + 1) + ovr = width * height / (areas[i] + areas[order[1:]] - width * height) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + return keep diff --git a/facelib/__init__.py b/facelib/__init__.py index 89528ab..1e89d43 100644 --- a/facelib/__init__.py +++ b/facelib/__init__.py @@ -1,7 +1,7 @@ -from .FaceType import FaceType -from .DLIBExtractor import DLIBExtractor -from .MTCExtractor import MTCExtractor -from .S3FDExtractor import S3FDExtractor -from .LandmarksExtractor import LandmarksExtractor -from .FANSegmentator import FANSegmentator +from .FaceType import FaceType +from .DLIBExtractor import DLIBExtractor +from .MTCExtractor import MTCExtractor +from .S3FDExtractor import S3FDExtractor +from .LandmarksExtractor import LandmarksExtractor +from .FANSegmentator import FANSegmentator from .PoseEstimator import PoseEstimator \ No newline at end of file diff --git a/imagelib/DCSCN.py b/imagelib/DCSCN.py index 8978211..332e70a 100644 --- a/imagelib/DCSCN.py +++ b/imagelib/DCSCN.py @@ -1,164 +1,164 @@ -import numpy as np -import cv2 -from pathlib import Path -from nnlib import nnlib -from interact import interact as io - -class DCSCN(): - def __init__(self): - exec( nnlib.import_all(), locals(), globals() ) - - inp_x = KL.Input([None, None, 1]) - inp_x2 = KL.Input([None, None, 1]) - - x = inp_x - layers_count = 12 - layers = [] - for i in range(1,layers_count+1): - if i == 1: - output_feature_num = 196 - else: - x1 = (i-1) / float(layers_count - 1) - y1 = x1 ** (1.0 / 1.5) - output_feature_num = int((196 - 48) * (1 - y1) + 48) - x = Conv2D(output_feature_num, kernel_size=3, strides=1, padding='same', name='CNN%d' % (i) ) (x) - x = PReLU(shared_axes=[1,2], name='CNN%d_prelu' % (i) ) (x) - layers.append(x) - - x_concat = KL.Concatenate()(layers) - - A1 = Conv2D(64, kernel_size=1, strides=1, padding='same', name='A1' ) (x_concat) - A1 = PReLU(shared_axes=[1,2], name='A1_prelu') (A1) - - B1 = Conv2D(32, kernel_size=1, strides=1, padding='same', name='B1' ) (x_concat) - B1 = PReLU(shared_axes=[1,2], name='B1_prelu') (B1) - - B2 = Conv2D(32, kernel_size=3, strides=1, padding='same', name='B2' ) (B1) - B2 = PReLU(shared_axes=[1,2], name='B2_prelu') (B2) - - x = KL.Concatenate()([B2,A1]) - x = Conv2D(96*4, kernel_size=3, strides=1, padding='same', name='Up_PS' )(x) - x = PixelShuffler()(x) - x = Conv2D(1, kernel_size=3, strides=1, padding='same', name='R_CNN1', use_bias=False )(x) - x = KL.Add()([x, inp_x2]) - self.model = keras.models.Model ([inp_x, inp_x2], [x]) - self.model.load_weights ( Path(__file__).parent / 'DCSCN.h5' ) - - def upscale(self, img, is_bgr=True, is_float=True): - if is_bgr: - img = img[...,::-1] - - if is_float: - img = np.clip (img*255, 0, 255) - - img_shape_len = len(img.shape) - h, w = img.shape[:2] - ch = img.shape[2] if len(img.shape) >= 3 else 1 - - nh, nw = h*2, w*2 - - img_x = self.convert_rgb_to_y(img) - - img_bx = cv2.resize(img_x, (nh, nw), cv2.INTER_CUBIC) - - ensemble = 8 - - output = np.zeros([nh,nw,1], dtype=np.float32) - - for i in range(ensemble): - x = np.reshape( self.flip(img_x, i), (1,h,w,1) ) - bx = np.reshape( self.flip(img_bx, i), (1,nh,nw,1) ) - y = self.model.predict([x,bx])[0] - y = self.flip(y, i, invert=True) - output += y - - output /= ensemble - - bimg = cv2.resize(img, (nh, nw), cv2.INTER_CUBIC) - bimg_ycbcr = self.convert_rgb_to_ycbcr(bimg) - - if ch > 1: - output = self.convert_y_and_cbcr_to_rgb(output, bimg_ycbcr[:, :, 1:3]) - - if is_float: - output = np.clip (output/255.0, 0, 1.0) - - if is_bgr: - output = output[...,::-1] - - return output - - def convert_rgb_to_y(self, image): - if len(image.shape) <= 2 or image.shape[2] == 1: - return image - - xform = np.array([[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0]], dtype=np.float32) - y_image = image.dot(xform.T) + 16.0 - - return y_image - - - def convert_rgb_to_ycbcr(self, image): - if len(image.shape) <= 2 or image.shape[2] == 1: - return image - - xform = np.array( - [[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0], - [- 37.945 / 256.0, - 74.494 / 256.0, 112.439 / 256.0], - [112.439 / 256.0, - 94.154 / 256.0, - 18.285 / 256.0]], dtype=np.float32) - - ycbcr_image = image.dot(xform.T) - ycbcr_image[:, :, 0] += 16.0 - ycbcr_image[:, :, [1, 2]] += 128.0 - - return ycbcr_image - - def convert_ycbcr_to_rgb(self,ycbcr_image): - rgb_image = np.zeros([ycbcr_image.shape[0], ycbcr_image.shape[1], 3], dtype=np.float32) - - rgb_image[:, :, 0] = ycbcr_image[:, :, 0] - 16.0 - rgb_image[:, :, [1, 2]] = ycbcr_image[:, :, [1, 2]] - 128.0 - xform = np.array( - [[298.082 / 256.0, 0, 408.583 / 256.0], - [298.082 / 256.0, -100.291 / 256.0, -208.120 / 256.0], - [298.082 / 256.0, 516.412 / 256.0, 0]], dtype=np.float32) - rgb_image = rgb_image.dot(xform.T) - - return rgb_image - - def convert_y_and_cbcr_to_rgb(self,y_image, cbcr_image): - if len(y_image.shape) <= 2: - y_image = y_image.reshape[y_image.shape[0], y_image.shape[1], 1] - - if len(y_image.shape) == 3 and y_image.shape[2] == 3: - y_image = y_image[:, :, 0:1] - - ycbcr_image = np.zeros([y_image.shape[0], y_image.shape[1], 3], dtype=np.float32) - ycbcr_image[:, :, 0] = y_image[:, :, 0] - ycbcr_image[:, :, 1:3] = cbcr_image[:, :, 0:2] - - return self.convert_ycbcr_to_rgb(ycbcr_image) - - def flip(self, image, flip_type, invert=False): - if flip_type == 0: - return image - elif flip_type == 1: - return np.flipud(image) - elif flip_type == 2: - return np.fliplr(image) - elif flip_type == 3: - return np.flipud(np.fliplr(image)) - elif flip_type == 4: - return np.rot90(image, 1 if invert is False else -1) - elif flip_type == 5: - return np.rot90(image, -1 if invert is False else 1) - elif flip_type == 6: - if invert is False: - return np.flipud(np.rot90(image)) - else: - return np.rot90(np.flipud(image), -1) - elif flip_type == 7: - if invert is False: - return np.flipud(np.rot90(image, -1)) - else: - return np.rot90(np.flipud(image), 1) +import numpy as np +import cv2 +from pathlib import Path +from nnlib import nnlib +from interact import interact as io + +class DCSCN(): + def __init__(self): + exec( nnlib.import_all(), locals(), globals() ) + + inp_x = KL.Input([None, None, 1]) + inp_x2 = KL.Input([None, None, 1]) + + x = inp_x + layers_count = 12 + layers = [] + for i in range(1,layers_count+1): + if i == 1: + output_feature_num = 196 + else: + x1 = (i-1) / float(layers_count - 1) + y1 = x1 ** (1.0 / 1.5) + output_feature_num = int((196 - 48) * (1 - y1) + 48) + x = Conv2D(output_feature_num, kernel_size=3, strides=1, padding='same', name='CNN%d' % (i) ) (x) + x = PReLU(shared_axes=[1,2], name='CNN%d_prelu' % (i) ) (x) + layers.append(x) + + x_concat = KL.Concatenate()(layers) + + A1 = Conv2D(64, kernel_size=1, strides=1, padding='same', name='A1' ) (x_concat) + A1 = PReLU(shared_axes=[1,2], name='A1_prelu') (A1) + + B1 = Conv2D(32, kernel_size=1, strides=1, padding='same', name='B1' ) (x_concat) + B1 = PReLU(shared_axes=[1,2], name='B1_prelu') (B1) + + B2 = Conv2D(32, kernel_size=3, strides=1, padding='same', name='B2' ) (B1) + B2 = PReLU(shared_axes=[1,2], name='B2_prelu') (B2) + + x = KL.Concatenate()([B2,A1]) + x = Conv2D(96*4, kernel_size=3, strides=1, padding='same', name='Up_PS' )(x) + x = PixelShuffler()(x) + x = Conv2D(1, kernel_size=3, strides=1, padding='same', name='R_CNN1', use_bias=False )(x) + x = KL.Add()([x, inp_x2]) + self.model = keras.models.Model ([inp_x, inp_x2], [x]) + self.model.load_weights ( Path(__file__).parent / 'DCSCN.h5' ) + + def upscale(self, img, is_bgr=True, is_float=True): + if is_bgr: + img = img[...,::-1] + + if is_float: + img = np.clip (img*255, 0, 255) + + img_shape_len = len(img.shape) + h, w = img.shape[:2] + ch = img.shape[2] if len(img.shape) >= 3 else 1 + + nh, nw = h*2, w*2 + + img_x = self.convert_rgb_to_y(img) + + img_bx = cv2.resize(img_x, (nh, nw), cv2.INTER_CUBIC) + + ensemble = 8 + + output = np.zeros([nh,nw,1], dtype=np.float32) + + for i in range(ensemble): + x = np.reshape( self.flip(img_x, i), (1,h,w,1) ) + bx = np.reshape( self.flip(img_bx, i), (1,nh,nw,1) ) + y = self.model.predict([x,bx])[0] + y = self.flip(y, i, invert=True) + output += y + + output /= ensemble + + bimg = cv2.resize(img, (nh, nw), cv2.INTER_CUBIC) + bimg_ycbcr = self.convert_rgb_to_ycbcr(bimg) + + if ch > 1: + output = self.convert_y_and_cbcr_to_rgb(output, bimg_ycbcr[:, :, 1:3]) + + if is_float: + output = np.clip (output/255.0, 0, 1.0) + + if is_bgr: + output = output[...,::-1] + + return output + + def convert_rgb_to_y(self, image): + if len(image.shape) <= 2 or image.shape[2] == 1: + return image + + xform = np.array([[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0]], dtype=np.float32) + y_image = image.dot(xform.T) + 16.0 + + return y_image + + + def convert_rgb_to_ycbcr(self, image): + if len(image.shape) <= 2 or image.shape[2] == 1: + return image + + xform = np.array( + [[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0], + [- 37.945 / 256.0, - 74.494 / 256.0, 112.439 / 256.0], + [112.439 / 256.0, - 94.154 / 256.0, - 18.285 / 256.0]], dtype=np.float32) + + ycbcr_image = image.dot(xform.T) + ycbcr_image[:, :, 0] += 16.0 + ycbcr_image[:, :, [1, 2]] += 128.0 + + return ycbcr_image + + def convert_ycbcr_to_rgb(self,ycbcr_image): + rgb_image = np.zeros([ycbcr_image.shape[0], ycbcr_image.shape[1], 3], dtype=np.float32) + + rgb_image[:, :, 0] = ycbcr_image[:, :, 0] - 16.0 + rgb_image[:, :, [1, 2]] = ycbcr_image[:, :, [1, 2]] - 128.0 + xform = np.array( + [[298.082 / 256.0, 0, 408.583 / 256.0], + [298.082 / 256.0, -100.291 / 256.0, -208.120 / 256.0], + [298.082 / 256.0, 516.412 / 256.0, 0]], dtype=np.float32) + rgb_image = rgb_image.dot(xform.T) + + return rgb_image + + def convert_y_and_cbcr_to_rgb(self,y_image, cbcr_image): + if len(y_image.shape) <= 2: + y_image = y_image.reshape[y_image.shape[0], y_image.shape[1], 1] + + if len(y_image.shape) == 3 and y_image.shape[2] == 3: + y_image = y_image[:, :, 0:1] + + ycbcr_image = np.zeros([y_image.shape[0], y_image.shape[1], 3], dtype=np.float32) + ycbcr_image[:, :, 0] = y_image[:, :, 0] + ycbcr_image[:, :, 1:3] = cbcr_image[:, :, 0:2] + + return self.convert_ycbcr_to_rgb(ycbcr_image) + + def flip(self, image, flip_type, invert=False): + if flip_type == 0: + return image + elif flip_type == 1: + return np.flipud(image) + elif flip_type == 2: + return np.fliplr(image) + elif flip_type == 3: + return np.flipud(np.fliplr(image)) + elif flip_type == 4: + return np.rot90(image, 1 if invert is False else -1) + elif flip_type == 5: + return np.rot90(image, -1 if invert is False else 1) + elif flip_type == 6: + if invert is False: + return np.flipud(np.rot90(image)) + else: + return np.rot90(np.flipud(image), -1) + elif flip_type == 7: + if invert is False: + return np.flipud(np.rot90(image, -1)) + else: + return np.rot90(np.flipud(image), 1) diff --git a/imagelib/IEPolys.py b/imagelib/IEPolys.py index daeef28..820a229 100644 --- a/imagelib/IEPolys.py +++ b/imagelib/IEPolys.py @@ -1,104 +1,104 @@ -import numpy as np -import cv2 - -class IEPolysPoints: - def __init__(self, IEPolys_parent, type): - self.parent = IEPolys_parent - self.type = type - self.points = np.empty( (0,2), dtype=np.int32 ) - self.n_max = self.n = 0 - - def add(self,x,y): - self.points = np.append(self.points[0:self.n], [ (x,y) ], axis=0) - self.n_max = self.n = self.n + 1 - self.parent.dirty = True - - def n_dec(self): - self.n = max(0, self.n-1) - self.parent.dirty = True - return self.n - - def n_inc(self): - self.n = min(len(self.points), self.n+1) - self.parent.dirty = True - return self.n - - def n_clip(self): - self.points = self.points[0:self.n] - self.n_max = self.n - - def cur_point(self): - return self.points[self.n-1] - - def points_to_n(self): - return self.points[0:self.n] - - def set_points(self, points): - self.points = np.array(points) - self.n_max = self.n = len(points) - self.parent.dirty = True - -class IEPolys: - def __init__(self): - self.list = [] - self.n_max = self.n = 0 - self.dirty = True - - def add(self, type): - self.list = self.list[0:self.n] - self.list.append ( IEPolysPoints(self, type) ) - self.n_max = self.n = self.n + 1 - self.dirty = True - - def n_dec(self): - self.n = max(0, self.n-1) - self.dirty = True - return self.n - - def n_inc(self): - self.n = min(len(self.list), self.n+1) - self.dirty = True - return self.n - - def n_list(self): - return self.list[self.n-1] - - def n_clip(self): - self.list = self.list[0:self.n] - self.n_max = self.n - if self.n > 0: - self.list[-1].n_clip() - - def __iter__(self): - for n in range(self.n): - yield self.list[n] - - def switch_dirty(self): - d = self.dirty - self.dirty = False - return d - - def overlay_mask(self, mask): - h,w,c = mask.shape - white = (1,)*c - black = (0,)*c - for n in range(self.n): - poly = self.list[n] - if poly.n > 0: - cv2.fillPoly(mask, [poly.points_to_n()], white if poly.type == 1 else black ) - - def dump(self): - result = [] - for n in range(self.n): - l = self.list[n] - result += [ (l.type, l.points_to_n().tolist() ) ] - return result - - @staticmethod - def load(ie_polys=None): - obj = IEPolys() - if ie_polys is not None: - for (type, points) in ie_polys: - obj.add(type) - obj.n_list().set_points(points) +import numpy as np +import cv2 + +class IEPolysPoints: + def __init__(self, IEPolys_parent, type): + self.parent = IEPolys_parent + self.type = type + self.points = np.empty( (0,2), dtype=np.int32 ) + self.n_max = self.n = 0 + + def add(self,x,y): + self.points = np.append(self.points[0:self.n], [ (x,y) ], axis=0) + self.n_max = self.n = self.n + 1 + self.parent.dirty = True + + def n_dec(self): + self.n = max(0, self.n-1) + self.parent.dirty = True + return self.n + + def n_inc(self): + self.n = min(len(self.points), self.n+1) + self.parent.dirty = True + return self.n + + def n_clip(self): + self.points = self.points[0:self.n] + self.n_max = self.n + + def cur_point(self): + return self.points[self.n-1] + + def points_to_n(self): + return self.points[0:self.n] + + def set_points(self, points): + self.points = np.array(points) + self.n_max = self.n = len(points) + self.parent.dirty = True + +class IEPolys: + def __init__(self): + self.list = [] + self.n_max = self.n = 0 + self.dirty = True + + def add(self, type): + self.list = self.list[0:self.n] + self.list.append ( IEPolysPoints(self, type) ) + self.n_max = self.n = self.n + 1 + self.dirty = True + + def n_dec(self): + self.n = max(0, self.n-1) + self.dirty = True + return self.n + + def n_inc(self): + self.n = min(len(self.list), self.n+1) + self.dirty = True + return self.n + + def n_list(self): + return self.list[self.n-1] + + def n_clip(self): + self.list = self.list[0:self.n] + self.n_max = self.n + if self.n > 0: + self.list[-1].n_clip() + + def __iter__(self): + for n in range(self.n): + yield self.list[n] + + def switch_dirty(self): + d = self.dirty + self.dirty = False + return d + + def overlay_mask(self, mask): + h,w,c = mask.shape + white = (1,)*c + black = (0,)*c + for n in range(self.n): + poly = self.list[n] + if poly.n > 0: + cv2.fillPoly(mask, [poly.points_to_n()], white if poly.type == 1 else black ) + + def dump(self): + result = [] + for n in range(self.n): + l = self.list[n] + result += [ (l.type, l.points_to_n().tolist() ) ] + return result + + @staticmethod + def load(ie_polys=None): + obj = IEPolys() + if ie_polys is not None: + for (type, points) in ie_polys: + obj.add(type) + obj.n_list().set_points(points) return obj \ No newline at end of file diff --git a/imagelib/__init__.py b/imagelib/__init__.py index 3436f56..14ed304 100644 --- a/imagelib/__init__.py +++ b/imagelib/__init__.py @@ -1,27 +1,27 @@ -from .estimate_sharpness import estimate_sharpness -from .equalize_and_stack_square import equalize_and_stack_square - -from .text import get_text_image -from .text import get_draw_text_lines - -from .draw import draw_polygon -from .draw import draw_rect - -from .morph import morph_by_points - -from .warp import gen_warp_params -from .warp import warp_by_params - -from .reduce_colors import reduce_colors - -from .color_transfer import color_hist_match -from .color_transfer import reinhard_color_transfer -from .color_transfer import linear_color_transfer - -from .DCSCN import DCSCN - -from .common import normalize_channels - -from .IEPolys import IEPolys - +from .estimate_sharpness import estimate_sharpness +from .equalize_and_stack_square import equalize_and_stack_square + +from .text import get_text_image +from .text import get_draw_text_lines + +from .draw import draw_polygon +from .draw import draw_rect + +from .morph import morph_by_points + +from .warp import gen_warp_params +from .warp import warp_by_params + +from .reduce_colors import reduce_colors + +from .color_transfer import color_hist_match +from .color_transfer import reinhard_color_transfer +from .color_transfer import linear_color_transfer + +from .DCSCN import DCSCN + +from .common import normalize_channels + +from .IEPolys import IEPolys + from .blur import LinearMotionBlur \ No newline at end of file diff --git a/imagelib/blur.py b/imagelib/blur.py index 54c7199..e12ccfd 100644 --- a/imagelib/blur.py +++ b/imagelib/blur.py @@ -1,143 +1,143 @@ -import math -import numpy as np -from PIL import Image -from scipy.signal import convolve2d -from skimage.draw import line - -class LineDictionary: - def __init__(self): - self.lines = {} - self.Create3x3Lines() - self.Create5x5Lines() - self.Create7x7Lines() - self.Create9x9Lines() - return - - def Create3x3Lines(self): - lines = {} - lines[0] = [1,0,1,2] - lines[45] = [2,0,0,2] - lines[90] = [0,1,2,1] - lines[135] = [0,0,2,2] - self.lines[3] = lines - return - - def Create5x5Lines(self): - lines = {} - lines[0] = [2,0,2,4] - lines[22.5] = [3,0,1,4] - lines[45] = [0,4,4,0] - lines[67.5] = [0,3,4,1] - lines[90] = [0,2,4,2] - lines[112.5] = [0,1,4,3] - lines[135] = [0,0,4,4] - lines[157.5]= [1,0,3,4] - self.lines[5] = lines - return - - def Create7x7Lines(self): - lines = {} - lines[0] = [3,0,3,6] - lines[15] = [4,0,2,6] - lines[30] = [5,0,1,6] - lines[45] = [6,0,0,6] - lines[60] = [6,1,0,5] - lines[75] = [6,2,0,4] - lines[90] = [0,3,6,3] - lines[105] = [0,2,6,4] - lines[120] = [0,1,6,5] - lines[135] = [0,0,6,6] - lines[150] = [1,0,5,6] - lines[165] = [2,0,4,6] - self.lines[7] = lines - return - - def Create9x9Lines(self): - lines = {} - lines[0] = [4,0,4,8] - lines[11.25] = [5,0,3,8] - lines[22.5] = [6,0,2,8] - lines[33.75] = [7,0,1,8] - lines[45] = [8,0,0,8] - lines[56.25] = [8,1,0,7] - lines[67.5] = [8,2,0,6] - lines[78.75] = [8,3,0,5] - lines[90] = [8,4,0,4] - lines[101.25] = [0,3,8,5] - lines[112.5] = [0,2,8,6] - lines[123.75] = [0,1,8,7] - lines[135] = [0,0,8,8] - lines[146.25] = [1,0,7,8] - lines[157.5] = [2,0,6,8] - lines[168.75] = [3,0,5,8] - self.lines[9] = lines - return - -lineLengths =[3,5,7,9] -lineTypes = ["full", "right", "left"] - -lineDict = LineDictionary() - -def LinearMotionBlur_random(img): - lineLengthIdx = np.random.randint(0, len(lineLengths)) - lineTypeIdx = np.random.randint(0, len(lineTypes)) - lineLength = lineLengths[lineLengthIdx] - lineType = lineTypes[lineTypeIdx] - lineAngle = randomAngle(lineLength) - return LinearMotionBlur(img, lineLength, lineAngle, lineType) - -def LinearMotionBlur(img, dim, angle, linetype='full'): - if len(img.shape) == 2: - h, w = img.shape - c = 1 - img = img[...,np.newaxis] - elif len(img.shape) == 3: - h,w,c = img.shape - else: - raise ValueError('unsupported img.shape') - - kernel = LineKernel(dim, angle, linetype) - - imgs = [] - for i in range(c): - imgs.append ( convolve2d(img[...,i], kernel, mode='same') ) - - img = np.stack(imgs, axis=-1) - img = np.squeeze(img) - return img - -def LineKernel(dim, angle, linetype): - kernelwidth = dim - kernelCenter = int(math.floor(dim/2)) - angle = SanitizeAngleValue(kernelCenter, angle) - kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32) - lineAnchors = lineDict.lines[dim][angle] - if(linetype == 'right'): - lineAnchors[0] = kernelCenter - lineAnchors[1] = kernelCenter - if(linetype == 'left'): - lineAnchors[2] = kernelCenter - lineAnchors[3] = kernelCenter - rr,cc = line(lineAnchors[0], lineAnchors[1], lineAnchors[2], lineAnchors[3]) - kernel[rr,cc]=1 - normalizationFactor = np.count_nonzero(kernel) - kernel = kernel / normalizationFactor - return kernel - -def SanitizeAngleValue(kernelCenter, angle): - numDistinctLines = kernelCenter * 4 - angle = math.fmod(angle, 180.0) - validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False) - angle = nearestValue(angle, validLineAngles) - return angle - -def nearestValue(theta, validAngles): - idx = (np.abs(validAngles-theta)).argmin() - return validAngles[idx] - -def randomAngle(kerneldim): - kernelCenter = int(math.floor(kerneldim/2)) - numDistinctLines = kernelCenter * 4 - validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False) - angleIdx = np.random.randint(0, len(validLineAngles)) +import math +import numpy as np +from PIL import Image +from scipy.signal import convolve2d +from skimage.draw import line + +class LineDictionary: + def __init__(self): + self.lines = {} + self.Create3x3Lines() + self.Create5x5Lines() + self.Create7x7Lines() + self.Create9x9Lines() + return + + def Create3x3Lines(self): + lines = {} + lines[0] = [1,0,1,2] + lines[45] = [2,0,0,2] + lines[90] = [0,1,2,1] + lines[135] = [0,0,2,2] + self.lines[3] = lines + return + + def Create5x5Lines(self): + lines = {} + lines[0] = [2,0,2,4] + lines[22.5] = [3,0,1,4] + lines[45] = [0,4,4,0] + lines[67.5] = [0,3,4,1] + lines[90] = [0,2,4,2] + lines[112.5] = [0,1,4,3] + lines[135] = [0,0,4,4] + lines[157.5]= [1,0,3,4] + self.lines[5] = lines + return + + def Create7x7Lines(self): + lines = {} + lines[0] = [3,0,3,6] + lines[15] = [4,0,2,6] + lines[30] = [5,0,1,6] + lines[45] = [6,0,0,6] + lines[60] = [6,1,0,5] + lines[75] = [6,2,0,4] + lines[90] = [0,3,6,3] + lines[105] = [0,2,6,4] + lines[120] = [0,1,6,5] + lines[135] = [0,0,6,6] + lines[150] = [1,0,5,6] + lines[165] = [2,0,4,6] + self.lines[7] = lines + return + + def Create9x9Lines(self): + lines = {} + lines[0] = [4,0,4,8] + lines[11.25] = [5,0,3,8] + lines[22.5] = [6,0,2,8] + lines[33.75] = [7,0,1,8] + lines[45] = [8,0,0,8] + lines[56.25] = [8,1,0,7] + lines[67.5] = [8,2,0,6] + lines[78.75] = [8,3,0,5] + lines[90] = [8,4,0,4] + lines[101.25] = [0,3,8,5] + lines[112.5] = [0,2,8,6] + lines[123.75] = [0,1,8,7] + lines[135] = [0,0,8,8] + lines[146.25] = [1,0,7,8] + lines[157.5] = [2,0,6,8] + lines[168.75] = [3,0,5,8] + self.lines[9] = lines + return + +lineLengths =[3,5,7,9] +lineTypes = ["full", "right", "left"] + +lineDict = LineDictionary() + +def LinearMotionBlur_random(img): + lineLengthIdx = np.random.randint(0, len(lineLengths)) + lineTypeIdx = np.random.randint(0, len(lineTypes)) + lineLength = lineLengths[lineLengthIdx] + lineType = lineTypes[lineTypeIdx] + lineAngle = randomAngle(lineLength) + return LinearMotionBlur(img, lineLength, lineAngle, lineType) + +def LinearMotionBlur(img, dim, angle, linetype='full'): + if len(img.shape) == 2: + h, w = img.shape + c = 1 + img = img[...,np.newaxis] + elif len(img.shape) == 3: + h,w,c = img.shape + else: + raise ValueError('unsupported img.shape') + + kernel = LineKernel(dim, angle, linetype) + + imgs = [] + for i in range(c): + imgs.append ( convolve2d(img[...,i], kernel, mode='same') ) + + img = np.stack(imgs, axis=-1) + img = np.squeeze(img) + return img + +def LineKernel(dim, angle, linetype): + kernelwidth = dim + kernelCenter = int(math.floor(dim/2)) + angle = SanitizeAngleValue(kernelCenter, angle) + kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32) + lineAnchors = lineDict.lines[dim][angle] + if(linetype == 'right'): + lineAnchors[0] = kernelCenter + lineAnchors[1] = kernelCenter + if(linetype == 'left'): + lineAnchors[2] = kernelCenter + lineAnchors[3] = kernelCenter + rr,cc = line(lineAnchors[0], lineAnchors[1], lineAnchors[2], lineAnchors[3]) + kernel[rr,cc]=1 + normalizationFactor = np.count_nonzero(kernel) + kernel = kernel / normalizationFactor + return kernel + +def SanitizeAngleValue(kernelCenter, angle): + numDistinctLines = kernelCenter * 4 + angle = math.fmod(angle, 180.0) + validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False) + angle = nearestValue(angle, validLineAngles) + return angle + +def nearestValue(theta, validAngles): + idx = (np.abs(validAngles-theta)).argmin() + return validAngles[idx] + +def randomAngle(kerneldim): + kernelCenter = int(math.floor(kerneldim/2)) + numDistinctLines = kernelCenter * 4 + validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False) + angleIdx = np.random.randint(0, len(validLineAngles)) return int(validLineAngles[angleIdx]) \ No newline at end of file diff --git a/imagelib/color_transfer.py b/imagelib/color_transfer.py index 1a7f1dd..eb66074 100644 --- a/imagelib/color_transfer.py +++ b/imagelib/color_transfer.py @@ -1,191 +1,191 @@ -import numpy as np -import cv2 - -def reinhard_color_transfer(target, source, clip=False, preserve_paper=False, source_mask=None, target_mask=None): - """ - Transfers the color distribution from the source to the target - image using the mean and standard deviations of the L*a*b* - color space. - - This implementation is (loosely) based on to the "Color Transfer - between Images" paper by Reinhard et al., 2001. - - Parameters: - ------- - source: NumPy array - OpenCV image in BGR color space (the source image) - target: NumPy array - OpenCV image in BGR color space (the target image) - clip: Should components of L*a*b* image be scaled by np.clip before - converting back to BGR color space? - If False then components will be min-max scaled appropriately. - Clipping will keep target image brightness truer to the input. - Scaling will adjust image brightness to avoid washed out portions - in the resulting color transfer that can be caused by clipping. - preserve_paper: Should color transfer strictly follow methodology - layed out in original paper? The method does not always produce - aesthetically pleasing results. - If False then L*a*b* components will scaled using the reciprocal of - the scaling factor proposed in the paper. This method seems to produce - more consistently aesthetically pleasing results - - Returns: - ------- - transfer: NumPy array - OpenCV image (w, h, 3) NumPy array (uint8) - """ - - - # convert the images from the RGB to L*ab* color space, being - # sure to utilizing the floating point data type (note: OpenCV - # expects floats to be 32-bit, so use that instead of 64-bit) - source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32) - target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype(np.float32) - - # compute color statistics for the source and target images - src_input = source if source_mask is None else source*source_mask - tgt_input = target if target_mask is None else target*target_mask - (lMeanSrc, lStdSrc, aMeanSrc, aStdSrc, bMeanSrc, bStdSrc) = lab_image_stats(src_input) - (lMeanTar, lStdTar, aMeanTar, aStdTar, bMeanTar, bStdTar) = lab_image_stats(tgt_input) - - # subtract the means from the target image - (l, a, b) = cv2.split(target) - l -= lMeanTar - a -= aMeanTar - b -= bMeanTar - - if preserve_paper: - # scale by the standard deviations using paper proposed factor - l = (lStdTar / lStdSrc) * l - a = (aStdTar / aStdSrc) * a - b = (bStdTar / bStdSrc) * b - else: - # scale by the standard deviations using reciprocal of paper proposed factor - l = (lStdSrc / lStdTar) * l - a = (aStdSrc / aStdTar) * a - b = (bStdSrc / bStdTar) * b - - # add in the source mean - l += lMeanSrc - a += aMeanSrc - b += bMeanSrc - - # clip/scale the pixel intensities to [0, 255] if they fall - # outside this range - l = _scale_array(l, clip=clip) - a = _scale_array(a, clip=clip) - b = _scale_array(b, clip=clip) - - # merge the channels together and convert back to the RGB color - # space, being sure to utilize the 8-bit unsigned integer data - # type - transfer = cv2.merge([l, a, b]) - transfer = cv2.cvtColor(transfer.astype(np.uint8), cv2.COLOR_LAB2BGR) - - # return the color transferred image - return transfer - -def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5): - ''' - Matches the colour distribution of the target image to that of the source image - using a linear transform. - Images are expected to be of form (w,h,c) and float in [0,1]. - Modes are chol, pca or sym for different choices of basis. - ''' - mu_t = target_img.mean(0).mean(0) - t = target_img - mu_t - t = t.transpose(2,0,1).reshape(3,-1) - Ct = t.dot(t.T) / t.shape[1] + eps * np.eye(t.shape[0]) - mu_s = source_img.mean(0).mean(0) - s = source_img - mu_s - s = s.transpose(2,0,1).reshape(3,-1) - Cs = s.dot(s.T) / s.shape[1] + eps * np.eye(s.shape[0]) - if mode == 'chol': - chol_t = np.linalg.cholesky(Ct) - chol_s = np.linalg.cholesky(Cs) - ts = chol_s.dot(np.linalg.inv(chol_t)).dot(t) - if mode == 'pca': - eva_t, eve_t = np.linalg.eigh(Ct) - Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) - eva_s, eve_s = np.linalg.eigh(Cs) - Qs = eve_s.dot(np.sqrt(np.diag(eva_s))).dot(eve_s.T) - ts = Qs.dot(np.linalg.inv(Qt)).dot(t) - if mode == 'sym': - eva_t, eve_t = np.linalg.eigh(Ct) - Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) - Qt_Cs_Qt = Qt.dot(Cs).dot(Qt) - eva_QtCsQt, eve_QtCsQt = np.linalg.eigh(Qt_Cs_Qt) - QtCsQt = eve_QtCsQt.dot(np.sqrt(np.diag(eva_QtCsQt))).dot(eve_QtCsQt.T) - ts = np.linalg.inv(Qt).dot(QtCsQt).dot(np.linalg.inv(Qt)).dot(t) - matched_img = ts.reshape(*target_img.transpose(2,0,1).shape).transpose(1,2,0) - matched_img += mu_s - matched_img[matched_img>1] = 1 - matched_img[matched_img<0] = 0 - return matched_img - -def lab_image_stats(image): - # compute the mean and standard deviation of each channel - (l, a, b) = cv2.split(image) - (lMean, lStd) = (l.mean(), l.std()) - (aMean, aStd) = (a.mean(), a.std()) - (bMean, bStd) = (b.mean(), b.std()) - - # return the color statistics - return (lMean, lStd, aMean, aStd, bMean, bStd) - -def _scale_array(arr, clip=True): - if clip: - return np.clip(arr, 0, 255) - - mn = arr.min() - mx = arr.max() - scale_range = (max([mn, 0]), min([mx, 255])) - - if mn < scale_range[0] or mx > scale_range[1]: - return (scale_range[1] - scale_range[0]) * (arr - mn) / (mx - mn) + scale_range[0] - - return arr - -def channel_hist_match(source, template, hist_match_threshold=255, mask=None): - # Code borrowed from: - # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x - masked_source = source - masked_template = template - - if mask is not None: - masked_source = source * mask - masked_template = template * mask - - oldshape = source.shape - source = source.ravel() - template = template.ravel() - masked_source = masked_source.ravel() - masked_template = masked_template.ravel() - s_values, bin_idx, s_counts = np.unique(source, return_inverse=True, - return_counts=True) - t_values, t_counts = np.unique(template, return_counts=True) - ms_values, mbin_idx, ms_counts = np.unique(source, return_inverse=True, - return_counts=True) - mt_values, mt_counts = np.unique(template, return_counts=True) - - s_quantiles = np.cumsum(s_counts).astype(np.float64) - s_quantiles = hist_match_threshold * s_quantiles / s_quantiles[-1] - t_quantiles = np.cumsum(t_counts).astype(np.float64) - t_quantiles = 255 * t_quantiles / t_quantiles[-1] - interp_t_values = np.interp(s_quantiles, t_quantiles, t_values) - - return interp_t_values[bin_idx].reshape(oldshape) - -def color_hist_match(src_im, tar_im, hist_match_threshold=255): - h,w,c = src_im.shape - matched_R = channel_hist_match(src_im[:,:,0], tar_im[:,:,0], hist_match_threshold, None) - matched_G = channel_hist_match(src_im[:,:,1], tar_im[:,:,1], hist_match_threshold, None) - matched_B = channel_hist_match(src_im[:,:,2], tar_im[:,:,2], hist_match_threshold, None) - - to_stack = (matched_R, matched_G, matched_B) - for i in range(3, c): - to_stack += ( src_im[:,:,i],) - - - matched = np.stack(to_stack, axis=-1).astype(src_im.dtype) - return matched +import numpy as np +import cv2 + +def reinhard_color_transfer(target, source, clip=False, preserve_paper=False, source_mask=None, target_mask=None): + """ + Transfers the color distribution from the source to the target + image using the mean and standard deviations of the L*a*b* + color space. + + This implementation is (loosely) based on to the "Color Transfer + between Images" paper by Reinhard et al., 2001. + + Parameters: + ------- + source: NumPy array + OpenCV image in BGR color space (the source image) + target: NumPy array + OpenCV image in BGR color space (the target image) + clip: Should components of L*a*b* image be scaled by np.clip before + converting back to BGR color space? + If False then components will be min-max scaled appropriately. + Clipping will keep target image brightness truer to the input. + Scaling will adjust image brightness to avoid washed out portions + in the resulting color transfer that can be caused by clipping. + preserve_paper: Should color transfer strictly follow methodology + layed out in original paper? The method does not always produce + aesthetically pleasing results. + If False then L*a*b* components will scaled using the reciprocal of + the scaling factor proposed in the paper. This method seems to produce + more consistently aesthetically pleasing results + + Returns: + ------- + transfer: NumPy array + OpenCV image (w, h, 3) NumPy array (uint8) + """ + + + # convert the images from the RGB to L*ab* color space, being + # sure to utilizing the floating point data type (note: OpenCV + # expects floats to be 32-bit, so use that instead of 64-bit) + source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32) + target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype(np.float32) + + # compute color statistics for the source and target images + src_input = source if source_mask is None else source*source_mask + tgt_input = target if target_mask is None else target*target_mask + (lMeanSrc, lStdSrc, aMeanSrc, aStdSrc, bMeanSrc, bStdSrc) = lab_image_stats(src_input) + (lMeanTar, lStdTar, aMeanTar, aStdTar, bMeanTar, bStdTar) = lab_image_stats(tgt_input) + + # subtract the means from the target image + (l, a, b) = cv2.split(target) + l -= lMeanTar + a -= aMeanTar + b -= bMeanTar + + if preserve_paper: + # scale by the standard deviations using paper proposed factor + l = (lStdTar / lStdSrc) * l + a = (aStdTar / aStdSrc) * a + b = (bStdTar / bStdSrc) * b + else: + # scale by the standard deviations using reciprocal of paper proposed factor + l = (lStdSrc / lStdTar) * l + a = (aStdSrc / aStdTar) * a + b = (bStdSrc / bStdTar) * b + + # add in the source mean + l += lMeanSrc + a += aMeanSrc + b += bMeanSrc + + # clip/scale the pixel intensities to [0, 255] if they fall + # outside this range + l = _scale_array(l, clip=clip) + a = _scale_array(a, clip=clip) + b = _scale_array(b, clip=clip) + + # merge the channels together and convert back to the RGB color + # space, being sure to utilize the 8-bit unsigned integer data + # type + transfer = cv2.merge([l, a, b]) + transfer = cv2.cvtColor(transfer.astype(np.uint8), cv2.COLOR_LAB2BGR) + + # return the color transferred image + return transfer + +def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5): + ''' + Matches the colour distribution of the target image to that of the source image + using a linear transform. + Images are expected to be of form (w,h,c) and float in [0,1]. + Modes are chol, pca or sym for different choices of basis. + ''' + mu_t = target_img.mean(0).mean(0) + t = target_img - mu_t + t = t.transpose(2,0,1).reshape(3,-1) + Ct = t.dot(t.T) / t.shape[1] + eps * np.eye(t.shape[0]) + mu_s = source_img.mean(0).mean(0) + s = source_img - mu_s + s = s.transpose(2,0,1).reshape(3,-1) + Cs = s.dot(s.T) / s.shape[1] + eps * np.eye(s.shape[0]) + if mode == 'chol': + chol_t = np.linalg.cholesky(Ct) + chol_s = np.linalg.cholesky(Cs) + ts = chol_s.dot(np.linalg.inv(chol_t)).dot(t) + if mode == 'pca': + eva_t, eve_t = np.linalg.eigh(Ct) + Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) + eva_s, eve_s = np.linalg.eigh(Cs) + Qs = eve_s.dot(np.sqrt(np.diag(eva_s))).dot(eve_s.T) + ts = Qs.dot(np.linalg.inv(Qt)).dot(t) + if mode == 'sym': + eva_t, eve_t = np.linalg.eigh(Ct) + Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T) + Qt_Cs_Qt = Qt.dot(Cs).dot(Qt) + eva_QtCsQt, eve_QtCsQt = np.linalg.eigh(Qt_Cs_Qt) + QtCsQt = eve_QtCsQt.dot(np.sqrt(np.diag(eva_QtCsQt))).dot(eve_QtCsQt.T) + ts = np.linalg.inv(Qt).dot(QtCsQt).dot(np.linalg.inv(Qt)).dot(t) + matched_img = ts.reshape(*target_img.transpose(2,0,1).shape).transpose(1,2,0) + matched_img += mu_s + matched_img[matched_img>1] = 1 + matched_img[matched_img<0] = 0 + return matched_img + +def lab_image_stats(image): + # compute the mean and standard deviation of each channel + (l, a, b) = cv2.split(image) + (lMean, lStd) = (l.mean(), l.std()) + (aMean, aStd) = (a.mean(), a.std()) + (bMean, bStd) = (b.mean(), b.std()) + + # return the color statistics + return (lMean, lStd, aMean, aStd, bMean, bStd) + +def _scale_array(arr, clip=True): + if clip: + return np.clip(arr, 0, 255) + + mn = arr.min() + mx = arr.max() + scale_range = (max([mn, 0]), min([mx, 255])) + + if mn < scale_range[0] or mx > scale_range[1]: + return (scale_range[1] - scale_range[0]) * (arr - mn) / (mx - mn) + scale_range[0] + + return arr + +def channel_hist_match(source, template, hist_match_threshold=255, mask=None): + # Code borrowed from: + # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x + masked_source = source + masked_template = template + + if mask is not None: + masked_source = source * mask + masked_template = template * mask + + oldshape = source.shape + source = source.ravel() + template = template.ravel() + masked_source = masked_source.ravel() + masked_template = masked_template.ravel() + s_values, bin_idx, s_counts = np.unique(source, return_inverse=True, + return_counts=True) + t_values, t_counts = np.unique(template, return_counts=True) + ms_values, mbin_idx, ms_counts = np.unique(source, return_inverse=True, + return_counts=True) + mt_values, mt_counts = np.unique(template, return_counts=True) + + s_quantiles = np.cumsum(s_counts).astype(np.float64) + s_quantiles = hist_match_threshold * s_quantiles / s_quantiles[-1] + t_quantiles = np.cumsum(t_counts).astype(np.float64) + t_quantiles = 255 * t_quantiles / t_quantiles[-1] + interp_t_values = np.interp(s_quantiles, t_quantiles, t_values) + + return interp_t_values[bin_idx].reshape(oldshape) + +def color_hist_match(src_im, tar_im, hist_match_threshold=255): + h,w,c = src_im.shape + matched_R = channel_hist_match(src_im[:,:,0], tar_im[:,:,0], hist_match_threshold, None) + matched_G = channel_hist_match(src_im[:,:,1], tar_im[:,:,1], hist_match_threshold, None) + matched_B = channel_hist_match(src_im[:,:,2], tar_im[:,:,2], hist_match_threshold, None) + + to_stack = (matched_R, matched_G, matched_B) + for i in range(3, c): + to_stack += ( src_im[:,:,i],) + + + matched = np.stack(to_stack, axis=-1).astype(src_im.dtype) + return matched diff --git a/imagelib/common.py b/imagelib/common.py index e63c998..229387f 100644 --- a/imagelib/common.py +++ b/imagelib/common.py @@ -1,21 +1,21 @@ -import numpy as np - -def normalize_channels(img, target_channels): - img_shape_len = len(img.shape) - if img_shape_len == 2: - h, w = img.shape - c = 0 - elif img_shape_len == 3: - h, w, c = img.shape - else: - raise ValueError("normalize: incorrect image dimensions.") - - if c == 0 and target_channels > 0: - img = img[...,np.newaxis] - if c == 1 and target_channels > 1: - img = np.repeat (img, target_channels, -1) - if c > target_channels: - img = img[...,0:target_channels] - c = target_channels - +import numpy as np + +def normalize_channels(img, target_channels): + img_shape_len = len(img.shape) + if img_shape_len == 2: + h, w = img.shape + c = 0 + elif img_shape_len == 3: + h, w, c = img.shape + else: + raise ValueError("normalize: incorrect image dimensions.") + + if c == 0 and target_channels > 0: + img = img[...,np.newaxis] + if c == 1 and target_channels > 1: + img = np.repeat (img, target_channels, -1) + if c > target_channels: + img = img[...,0:target_channels] + c = target_channels + return img \ No newline at end of file diff --git a/imagelib/draw.py b/imagelib/draw.py index c87dc0a..3de1191 100644 --- a/imagelib/draw.py +++ b/imagelib/draw.py @@ -1,13 +1,13 @@ -import numpy as np -import cv2 - -def draw_polygon (image, points, color, thickness = 1): - points_len = len(points) - for i in range (0, points_len): - p0 = tuple( points[i] ) - p1 = tuple( points[ (i+1) % points_len] ) - cv2.line (image, p0, p1, color, thickness=thickness) - -def draw_rect(image, rect, color, thickness=1): - l,t,r,b = rect - draw_polygon (image, [ (l,t), (r,t), (r,b), (l,b ) ], color, thickness) +import numpy as np +import cv2 + +def draw_polygon (image, points, color, thickness = 1): + points_len = len(points) + for i in range (0, points_len): + p0 = tuple( points[i] ) + p1 = tuple( points[ (i+1) % points_len] ) + cv2.line (image, p0, p1, color, thickness=thickness) + +def draw_rect(image, rect, color, thickness=1): + l,t,r,b = rect + draw_polygon (image, [ (l,t), (r,t), (r,b), (l,b ) ], color, thickness) diff --git a/imagelib/equalize_and_stack_square.py b/imagelib/equalize_and_stack_square.py index e25612c..31c435a 100644 --- a/imagelib/equalize_and_stack_square.py +++ b/imagelib/equalize_and_stack_square.py @@ -1,45 +1,45 @@ -import numpy as np -import cv2 - -def equalize_and_stack_square (images, axis=1): - max_c = max ([ 1 if len(image.shape) == 2 else image.shape[2] for image in images ] ) - - target_wh = 99999 - for i,image in enumerate(images): - if len(image.shape) == 2: - h,w = image.shape - c = 1 - else: - h,w,c = image.shape - - if h < target_wh: - target_wh = h - - if w < target_wh: - target_wh = w - - for i,image in enumerate(images): - if len(image.shape) == 2: - h,w = image.shape - c = 1 - else: - h,w,c = image.shape - - if c < max_c: - if c == 1: - if len(image.shape) == 2: - image = np.expand_dims ( image, -1 ) - image = np.concatenate ( (image,)*max_c, -1 ) - elif c == 2: #GA - image = np.expand_dims ( image[...,0], -1 ) - image = np.concatenate ( (image,)*max_c, -1 ) - else: - image = np.concatenate ( (image, np.ones((h,w,max_c - c))), -1 ) - - if h != target_wh or w != target_wh: - image = cv2.resize ( image, (target_wh, target_wh) ) - h,w,c = image.shape - - images[i] = image - +import numpy as np +import cv2 + +def equalize_and_stack_square (images, axis=1): + max_c = max ([ 1 if len(image.shape) == 2 else image.shape[2] for image in images ] ) + + target_wh = 99999 + for i,image in enumerate(images): + if len(image.shape) == 2: + h,w = image.shape + c = 1 + else: + h,w,c = image.shape + + if h < target_wh: + target_wh = h + + if w < target_wh: + target_wh = w + + for i,image in enumerate(images): + if len(image.shape) == 2: + h,w = image.shape + c = 1 + else: + h,w,c = image.shape + + if c < max_c: + if c == 1: + if len(image.shape) == 2: + image = np.expand_dims ( image, -1 ) + image = np.concatenate ( (image,)*max_c, -1 ) + elif c == 2: #GA + image = np.expand_dims ( image[...,0], -1 ) + image = np.concatenate ( (image,)*max_c, -1 ) + else: + image = np.concatenate ( (image, np.ones((h,w,max_c - c))), -1 ) + + if h != target_wh or w != target_wh: + image = cv2.resize ( image, (target_wh, target_wh) ) + h,w,c = image.shape + + images[i] = image + return np.concatenate ( images, axis = 1 ) \ No newline at end of file diff --git a/imagelib/estimate_sharpness.py b/imagelib/estimate_sharpness.py index fbe5b91..01ef0b7 100644 --- a/imagelib/estimate_sharpness.py +++ b/imagelib/estimate_sharpness.py @@ -1,277 +1,277 @@ -""" -Copyright (c) 2009-2010 Arizona Board of Regents. All Rights Reserved. - Contact: Lina Karam (karam@asu.edu) and Niranjan Narvekar (nnarveka@asu.edu) - Image, Video, and Usabilty (IVU) Lab, http://ivulab.asu.edu , Arizona State University - This copyright statement may not be removed from any file containing it or from modifications to these files. - This copyright notice must also be included in any file or product that is derived from the source files. - - Redistribution and use of this code in source and binary forms, with or without modification, are permitted provided that the - following conditions are met: - - Redistribution's of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - - Redistribution's in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the distribution. - - The Image, Video, and Usability Laboratory (IVU Lab, http://ivulab.asu.edu) is acknowledged in any publication that - reports research results using this code, copies of this code, or modifications of this code. - The code and our papers are to be cited in the bibliography as: - -N. D. Narvekar and L. J. Karam, "CPBD Sharpness Metric Software", http://ivulab.asu.edu/Quality/CPBD - -N. D. Narvekar and L. J. Karam, "A No-Reference Image Blur Metric Based on the Cumulative -Probability of Blur Detection (CPBD)," accepted and to appear in the IEEE Transactions on Image Processing, 2011. - -N. D. Narvekar and L. J. Karam, "An Improved No-Reference Sharpness Metric Based on the Probability of Blur Detection," International Workshop on Video Processing and Quality Metrics for Consumer Electronics (VPQM), January 2010, http://www.vpqm.org (pdf) - -N. D. Narvekar and L. J. Karam, "A No Reference Perceptual Quality Metric based on Cumulative Probability of Blur Detection," First International Workshop on the Quality of Multimedia Experience (QoMEX), pp. 87-91, July 2009. - - DISCLAIMER: - This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the Arizona Board of Regents, Arizona State University, IVU Lab members, authors or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute -goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. -""" - -import numpy as np -import cv2 -from math import atan2, pi -from scipy.ndimage import convolve -from skimage.filters.edges import HSOBEL_WEIGHTS -from skimage.feature import canny - -def sobel(image): - # type: (numpy.ndarray) -> numpy.ndarray - """ - Find edges using the Sobel approximation to the derivatives. - - Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l196). - """ - - h1 = np.array(HSOBEL_WEIGHTS) - h1 /= np.sum(abs(h1)) # normalize h1 - - strength2 = np.square(convolve(image, h1.T)) - - # Note: https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l59 - thresh2 = 2 * np.sqrt(np.mean(strength2)) - - strength2[strength2 <= thresh2] = 0 - return _simple_thinning(strength2) - - -def _simple_thinning(strength): - # type: (numpy.ndarray) -> numpy.ndarray - """ - Perform a very simple thinning. - - Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l512). - """ - num_rows, num_cols = strength.shape - - zero_column = np.zeros((num_rows, 1)) - zero_row = np.zeros((1, num_cols)) - - x = ( - (strength > np.c_[zero_column, strength[:, :-1]]) & - (strength > np.c_[strength[:, 1:], zero_column]) - ) - - y = ( - (strength > np.r_[zero_row, strength[:-1, :]]) & - (strength > np.r_[strength[1:, :], zero_row]) - ) - - return x | y - - - - - -# threshold to characterize blocks as edge/non-edge blocks -THRESHOLD = 0.002 -# fitting parameter -BETA = 3.6 -# block size -BLOCK_HEIGHT, BLOCK_WIDTH = (64, 64) -# just noticeable widths based on the perceptual experiments -WIDTH_JNB = np.concatenate([5*np.ones(51), 3*np.ones(205)]) - - -def compute(image): - # type: (numpy.ndarray) -> float - """Compute the sharpness metric for the given data.""" - - # convert the image to double for further processing - image = image.astype(np.float64) - - # edge detection using canny and sobel canny edge detection is done to - # classify the blocks as edge or non-edge blocks and sobel edge - # detection is done for the purpose of edge width measurement. - canny_edges = canny(image) - sobel_edges = sobel(image) - - # edge width calculation - marziliano_widths = marziliano_method(sobel_edges, image) - - # sharpness metric calculation - return _calculate_sharpness_metric(image, canny_edges, marziliano_widths) - - -def marziliano_method(edges, image): - # type: (numpy.ndarray, numpy.ndarray) -> numpy.ndarray - """ - Calculate the widths of the given edges. - - :return: A matrix with the same dimensions as the given image with 0's at - non-edge locations and edge-widths at the edge locations. - """ - - # `edge_widths` consists of zero and non-zero values. A zero value - # indicates that there is no edge at that position and a non-zero value - # indicates that there is an edge at that position and the value itself - # gives the edge width. - edge_widths = np.zeros(image.shape) - - # find the gradient for the image - gradient_y, gradient_x = np.gradient(image) - - # dimensions of the image - img_height, img_width = image.shape - - # holds the angle information of the edges - edge_angles = np.zeros(image.shape) - - # calculate the angle of the edges - for row in range(img_height): - for col in range(img_width): - if gradient_x[row, col] != 0: - edge_angles[row, col] = atan2(gradient_y[row, col], gradient_x[row, col]) * (180 / pi) - elif gradient_x[row, col] == 0 and gradient_y[row, col] == 0: - edge_angles[row,col] = 0 - elif gradient_x[row, col] == 0 and gradient_y[row, col] == pi/2: - edge_angles[row, col] = 90 - - - if np.any(edge_angles): - - # quantize the angle - quantized_angles = 45 * np.round(edge_angles / 45) - - for row in range(1, img_height - 1): - for col in range(1, img_width - 1): - if edges[row, col] == 1: - - # gradient angle = 180 or -180 - if quantized_angles[row, col] == 180 or quantized_angles[row, col] == -180: - for margin in range(100 + 1): - inner_border = (col - 1) - margin - outer_border = (col - 2) - margin - - # outside image or intensity increasing from left to right - if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) <= 0: - break - - width_left = margin + 1 - - for margin in range(100 + 1): - inner_border = (col + 1) + margin - outer_border = (col + 2) + margin - - # outside image or intensity increasing from left to right - if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) >= 0: - break - - width_right = margin + 1 - - edge_widths[row, col] = width_left + width_right - - - # gradient angle = 0 - if quantized_angles[row, col] == 0: - for margin in range(100 + 1): - inner_border = (col - 1) - margin - outer_border = (col - 2) - margin - - # outside image or intensity decreasing from left to right - if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) >= 0: - break - - width_left = margin + 1 - - for margin in range(100 + 1): - inner_border = (col + 1) + margin - outer_border = (col + 2) + margin - - # outside image or intensity decreasing from left to right - if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) <= 0: - break - - width_right = margin + 1 - - edge_widths[row, col] = width_right + width_left - - return edge_widths - - -def _calculate_sharpness_metric(image, edges, edge_widths): - # type: (numpy.array, numpy.array, numpy.array) -> numpy.float64 - - # get the size of image - img_height, img_width = image.shape - - total_num_edges = 0 - hist_pblur = np.zeros(101) - - # maximum block indices - num_blocks_vertically = int(img_height / BLOCK_HEIGHT) - num_blocks_horizontally = int(img_width / BLOCK_WIDTH) - - # loop over the blocks - for i in range(num_blocks_vertically): - for j in range(num_blocks_horizontally): - - # get the row and col indices for the block pixel positions - rows = slice(BLOCK_HEIGHT * i, BLOCK_HEIGHT * (i + 1)) - cols = slice(BLOCK_WIDTH * j, BLOCK_WIDTH * (j + 1)) - - if is_edge_block(edges[rows, cols], THRESHOLD): - block_widths = edge_widths[rows, cols] - # rotate block to simulate column-major boolean indexing - block_widths = np.rot90(np.flipud(block_widths), 3) - block_widths = block_widths[block_widths != 0] - - block_contrast = get_block_contrast(image[rows, cols]) - block_jnb = WIDTH_JNB[block_contrast] - - # calculate the probability of blur detection at the edges - # detected in the block - prob_blur_detection = 1 - np.exp(-abs(block_widths/block_jnb) ** BETA) - - # update the statistics using the block information - for probability in prob_blur_detection: - bucket = int(round(probability * 100)) - hist_pblur[bucket] += 1 - total_num_edges += 1 - - # normalize the pdf - if total_num_edges > 0: - hist_pblur = hist_pblur / total_num_edges - - # calculate the sharpness metric - return np.sum(hist_pblur[:64]) - - -def is_edge_block(block, threshold): - # type: (numpy.ndarray, float) -> bool - """Decide whether the given block is an edge block.""" - return np.count_nonzero(block) > (block.size * threshold) - - -def get_block_contrast(block): - # type: (numpy.ndarray) -> int - return int(np.max(block) - np.min(block)) - - -def estimate_sharpness(image): - height, width = image.shape[:2] - - if image.ndim == 3: - image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - - return compute(image) +""" +Copyright (c) 2009-2010 Arizona Board of Regents. All Rights Reserved. + Contact: Lina Karam (karam@asu.edu) and Niranjan Narvekar (nnarveka@asu.edu) + Image, Video, and Usabilty (IVU) Lab, http://ivulab.asu.edu , Arizona State University + This copyright statement may not be removed from any file containing it or from modifications to these files. + This copyright notice must also be included in any file or product that is derived from the source files. + + Redistribution and use of this code in source and binary forms, with or without modification, are permitted provided that the + following conditions are met: + - Redistribution's of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + - Redistribution's in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the distribution. + - The Image, Video, and Usability Laboratory (IVU Lab, http://ivulab.asu.edu) is acknowledged in any publication that + reports research results using this code, copies of this code, or modifications of this code. + The code and our papers are to be cited in the bibliography as: + +N. D. Narvekar and L. J. Karam, "CPBD Sharpness Metric Software", http://ivulab.asu.edu/Quality/CPBD + +N. D. Narvekar and L. J. Karam, "A No-Reference Image Blur Metric Based on the Cumulative +Probability of Blur Detection (CPBD)," accepted and to appear in the IEEE Transactions on Image Processing, 2011. + +N. D. Narvekar and L. J. Karam, "An Improved No-Reference Sharpness Metric Based on the Probability of Blur Detection," International Workshop on Video Processing and Quality Metrics for Consumer Electronics (VPQM), January 2010, http://www.vpqm.org (pdf) + +N. D. Narvekar and L. J. Karam, "A No Reference Perceptual Quality Metric based on Cumulative Probability of Blur Detection," First International Workshop on the Quality of Multimedia Experience (QoMEX), pp. 87-91, July 2009. + + DISCLAIMER: + This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the Arizona Board of Regents, Arizona State University, IVU Lab members, authors or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute +goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. +""" + +import numpy as np +import cv2 +from math import atan2, pi +from scipy.ndimage import convolve +from skimage.filters.edges import HSOBEL_WEIGHTS +from skimage.feature import canny + +def sobel(image): + # type: (numpy.ndarray) -> numpy.ndarray + """ + Find edges using the Sobel approximation to the derivatives. + + Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l196). + """ + + h1 = np.array(HSOBEL_WEIGHTS) + h1 /= np.sum(abs(h1)) # normalize h1 + + strength2 = np.square(convolve(image, h1.T)) + + # Note: https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l59 + thresh2 = 2 * np.sqrt(np.mean(strength2)) + + strength2[strength2 <= thresh2] = 0 + return _simple_thinning(strength2) + + +def _simple_thinning(strength): + # type: (numpy.ndarray) -> numpy.ndarray + """ + Perform a very simple thinning. + + Inspired by the [Octave implementation](https://sourceforge.net/p/octave/image/ci/default/tree/inst/edge.m#l512). + """ + num_rows, num_cols = strength.shape + + zero_column = np.zeros((num_rows, 1)) + zero_row = np.zeros((1, num_cols)) + + x = ( + (strength > np.c_[zero_column, strength[:, :-1]]) & + (strength > np.c_[strength[:, 1:], zero_column]) + ) + + y = ( + (strength > np.r_[zero_row, strength[:-1, :]]) & + (strength > np.r_[strength[1:, :], zero_row]) + ) + + return x | y + + + + + +# threshold to characterize blocks as edge/non-edge blocks +THRESHOLD = 0.002 +# fitting parameter +BETA = 3.6 +# block size +BLOCK_HEIGHT, BLOCK_WIDTH = (64, 64) +# just noticeable widths based on the perceptual experiments +WIDTH_JNB = np.concatenate([5*np.ones(51), 3*np.ones(205)]) + + +def compute(image): + # type: (numpy.ndarray) -> float + """Compute the sharpness metric for the given data.""" + + # convert the image to double for further processing + image = image.astype(np.float64) + + # edge detection using canny and sobel canny edge detection is done to + # classify the blocks as edge or non-edge blocks and sobel edge + # detection is done for the purpose of edge width measurement. + canny_edges = canny(image) + sobel_edges = sobel(image) + + # edge width calculation + marziliano_widths = marziliano_method(sobel_edges, image) + + # sharpness metric calculation + return _calculate_sharpness_metric(image, canny_edges, marziliano_widths) + + +def marziliano_method(edges, image): + # type: (numpy.ndarray, numpy.ndarray) -> numpy.ndarray + """ + Calculate the widths of the given edges. + + :return: A matrix with the same dimensions as the given image with 0's at + non-edge locations and edge-widths at the edge locations. + """ + + # `edge_widths` consists of zero and non-zero values. A zero value + # indicates that there is no edge at that position and a non-zero value + # indicates that there is an edge at that position and the value itself + # gives the edge width. + edge_widths = np.zeros(image.shape) + + # find the gradient for the image + gradient_y, gradient_x = np.gradient(image) + + # dimensions of the image + img_height, img_width = image.shape + + # holds the angle information of the edges + edge_angles = np.zeros(image.shape) + + # calculate the angle of the edges + for row in range(img_height): + for col in range(img_width): + if gradient_x[row, col] != 0: + edge_angles[row, col] = atan2(gradient_y[row, col], gradient_x[row, col]) * (180 / pi) + elif gradient_x[row, col] == 0 and gradient_y[row, col] == 0: + edge_angles[row,col] = 0 + elif gradient_x[row, col] == 0 and gradient_y[row, col] == pi/2: + edge_angles[row, col] = 90 + + + if np.any(edge_angles): + + # quantize the angle + quantized_angles = 45 * np.round(edge_angles / 45) + + for row in range(1, img_height - 1): + for col in range(1, img_width - 1): + if edges[row, col] == 1: + + # gradient angle = 180 or -180 + if quantized_angles[row, col] == 180 or quantized_angles[row, col] == -180: + for margin in range(100 + 1): + inner_border = (col - 1) - margin + outer_border = (col - 2) - margin + + # outside image or intensity increasing from left to right + if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) <= 0: + break + + width_left = margin + 1 + + for margin in range(100 + 1): + inner_border = (col + 1) + margin + outer_border = (col + 2) + margin + + # outside image or intensity increasing from left to right + if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) >= 0: + break + + width_right = margin + 1 + + edge_widths[row, col] = width_left + width_right + + + # gradient angle = 0 + if quantized_angles[row, col] == 0: + for margin in range(100 + 1): + inner_border = (col - 1) - margin + outer_border = (col - 2) - margin + + # outside image or intensity decreasing from left to right + if outer_border < 0 or (image[row, outer_border] - image[row, inner_border]) >= 0: + break + + width_left = margin + 1 + + for margin in range(100 + 1): + inner_border = (col + 1) + margin + outer_border = (col + 2) + margin + + # outside image or intensity decreasing from left to right + if outer_border >= img_width or (image[row, outer_border] - image[row, inner_border]) <= 0: + break + + width_right = margin + 1 + + edge_widths[row, col] = width_right + width_left + + return edge_widths + + +def _calculate_sharpness_metric(image, edges, edge_widths): + # type: (numpy.array, numpy.array, numpy.array) -> numpy.float64 + + # get the size of image + img_height, img_width = image.shape + + total_num_edges = 0 + hist_pblur = np.zeros(101) + + # maximum block indices + num_blocks_vertically = int(img_height / BLOCK_HEIGHT) + num_blocks_horizontally = int(img_width / BLOCK_WIDTH) + + # loop over the blocks + for i in range(num_blocks_vertically): + for j in range(num_blocks_horizontally): + + # get the row and col indices for the block pixel positions + rows = slice(BLOCK_HEIGHT * i, BLOCK_HEIGHT * (i + 1)) + cols = slice(BLOCK_WIDTH * j, BLOCK_WIDTH * (j + 1)) + + if is_edge_block(edges[rows, cols], THRESHOLD): + block_widths = edge_widths[rows, cols] + # rotate block to simulate column-major boolean indexing + block_widths = np.rot90(np.flipud(block_widths), 3) + block_widths = block_widths[block_widths != 0] + + block_contrast = get_block_contrast(image[rows, cols]) + block_jnb = WIDTH_JNB[block_contrast] + + # calculate the probability of blur detection at the edges + # detected in the block + prob_blur_detection = 1 - np.exp(-abs(block_widths/block_jnb) ** BETA) + + # update the statistics using the block information + for probability in prob_blur_detection: + bucket = int(round(probability * 100)) + hist_pblur[bucket] += 1 + total_num_edges += 1 + + # normalize the pdf + if total_num_edges > 0: + hist_pblur = hist_pblur / total_num_edges + + # calculate the sharpness metric + return np.sum(hist_pblur[:64]) + + +def is_edge_block(block, threshold): + # type: (numpy.ndarray, float) -> bool + """Decide whether the given block is an edge block.""" + return np.count_nonzero(block) > (block.size * threshold) + + +def get_block_contrast(block): + # type: (numpy.ndarray) -> int + return int(np.max(block) - np.min(block)) + + +def estimate_sharpness(image): + height, width = image.shape[:2] + + if image.ndim == 3: + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + return compute(image) diff --git a/imagelib/morph.py b/imagelib/morph.py index bd3bbd8..8aa5114 100644 --- a/imagelib/morph.py +++ b/imagelib/morph.py @@ -1,37 +1,37 @@ -import numpy as np -import cv2 -from scipy.spatial import Delaunay - - -def applyAffineTransform(src, srcTri, dstTri, size) : - warpMat = cv2.getAffineTransform( np.float32(srcTri), np.float32(dstTri) ) - return cv2.warpAffine( src, warpMat, (size[0], size[1]), None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101 ) - -def morphTriangle(dst_img, src_img, st, dt) : - (h,w,c) = dst_img.shape - sr = np.array( cv2.boundingRect(np.float32(st)) ) - dr = np.array( cv2.boundingRect(np.float32(dt)) ) - sRect = st - sr[0:2] - dRect = dt - dr[0:2] - d_mask = np.zeros((dr[3], dr[2], c), dtype = np.float32) - cv2.fillConvexPoly(d_mask, np.int32(dRect), (1.0,)*c, 8, 0); - imgRect = src_img[sr[1]:sr[1] + sr[3], sr[0]:sr[0] + sr[2]] - size = (dr[2], dr[3]) - warpImage1 = applyAffineTransform(imgRect, sRect, dRect, size) - - if c == 1: - warpImage1 = np.expand_dims( warpImage1, -1 ) - - dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]] = dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]]*(1-d_mask) + warpImage1 * d_mask - -def morph_by_points (image, sp, dp): - if sp.shape != dp.shape: - raise ValueError ('morph_by_points() sp.shape != dp.shape') - (h,w,c) = image.shape - - result_image = np.zeros(image.shape, dtype = image.dtype) - - for tri in Delaunay(dp).simplices: - morphTriangle(result_image, image, sp[tri], dp[tri]) - +import numpy as np +import cv2 +from scipy.spatial import Delaunay + + +def applyAffineTransform(src, srcTri, dstTri, size) : + warpMat = cv2.getAffineTransform( np.float32(srcTri), np.float32(dstTri) ) + return cv2.warpAffine( src, warpMat, (size[0], size[1]), None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101 ) + +def morphTriangle(dst_img, src_img, st, dt) : + (h,w,c) = dst_img.shape + sr = np.array( cv2.boundingRect(np.float32(st)) ) + dr = np.array( cv2.boundingRect(np.float32(dt)) ) + sRect = st - sr[0:2] + dRect = dt - dr[0:2] + d_mask = np.zeros((dr[3], dr[2], c), dtype = np.float32) + cv2.fillConvexPoly(d_mask, np.int32(dRect), (1.0,)*c, 8, 0); + imgRect = src_img[sr[1]:sr[1] + sr[3], sr[0]:sr[0] + sr[2]] + size = (dr[2], dr[3]) + warpImage1 = applyAffineTransform(imgRect, sRect, dRect, size) + + if c == 1: + warpImage1 = np.expand_dims( warpImage1, -1 ) + + dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]] = dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]]*(1-d_mask) + warpImage1 * d_mask + +def morph_by_points (image, sp, dp): + if sp.shape != dp.shape: + raise ValueError ('morph_by_points() sp.shape != dp.shape') + (h,w,c) = image.shape + + result_image = np.zeros(image.shape, dtype = image.dtype) + + for tri in Delaunay(dp).simplices: + morphTriangle(result_image, image, sp[tri], dp[tri]) + return result_image \ No newline at end of file diff --git a/imagelib/reduce_colors.py b/imagelib/reduce_colors.py index 4ff8823..961f00d 100644 --- a/imagelib/reduce_colors.py +++ b/imagelib/reduce_colors.py @@ -1,14 +1,14 @@ -import numpy as np -import cv2 -from PIL import Image - -#n_colors = [0..256] -def reduce_colors (img_bgr, n_colors): - img_rgb = (img_bgr[...,::-1] * 255.0).astype(np.uint8) - img_rgb_pil = Image.fromarray(img_rgb) - img_rgb_pil_p = img_rgb_pil.convert('P', palette=Image.ADAPTIVE, colors=n_colors) - - img_rgb_p = img_rgb_pil_p.convert('RGB') - img_bgr = cv2.cvtColor( np.array(img_rgb_p, dtype=np.float32) / 255.0, cv2.COLOR_RGB2BGR ) - - return img_bgr +import numpy as np +import cv2 +from PIL import Image + +#n_colors = [0..256] +def reduce_colors (img_bgr, n_colors): + img_rgb = (img_bgr[...,::-1] * 255.0).astype(np.uint8) + img_rgb_pil = Image.fromarray(img_rgb) + img_rgb_pil_p = img_rgb_pil.convert('P', palette=Image.ADAPTIVE, colors=n_colors) + + img_rgb_p = img_rgb_pil_p.convert('RGB') + img_bgr = cv2.cvtColor( np.array(img_rgb_p, dtype=np.float32) / 255.0, cv2.COLOR_RGB2BGR ) + + return img_bgr diff --git a/imagelib/text.py b/imagelib/text.py index 31639dc..2659db2 100644 --- a/imagelib/text.py +++ b/imagelib/text.py @@ -1,64 +1,64 @@ -import localization -import numpy as np -from PIL import Image, ImageDraw, ImageFont - -pil_fonts = {} -def _get_pil_font (font, size): - global pil_fonts - try: - font_str_id = '%s_%d' % (font, size) - if font_str_id not in pil_fonts.keys(): - pil_fonts[font_str_id] = ImageFont.truetype(font + ".ttf", size=size, encoding="unic") - pil_font = pil_fonts[font_str_id] - return pil_font - except: - return ImageFont.load_default() - -def get_text_image( shape, text, color=(1,1,1), border=0.2, font=None): - h,w,c = shape - try: - pil_font = _get_pil_font( localization.get_default_ttf_font_name() , h-2) - - canvas = Image.new('RGB', (w,h) , (0,0,0) ) - draw = ImageDraw.Draw(canvas) - offset = ( 0, 0) - draw.text(offset, text, font=pil_font, fill=tuple((np.array(color)*255).astype(np.int)) ) - - result = np.asarray(canvas) / 255 - - if c > 3: - result = np.concatenate ( (result, np.ones ((h,w,c-3)) ), axis=-1 ) - elif c < 3: - result = result[...,0:c] - return result - except: - return np.zeros ( (h,w,c) ) - -def draw_text( image, rect, text, color=(1,1,1), border=0.2, font=None): - h,w,c = image.shape - - l,t,r,b = rect - l = np.clip (l, 0, w-1) - r = np.clip (r, 0, w-1) - t = np.clip (t, 0, h-1) - b = np.clip (b, 0, h-1) - - image[t:b, l:r] += get_text_image ( (b-t,r-l,c) , text, color, border, font ) - - -def draw_text_lines (image, rect, text_lines, color=(1,1,1), border=0.2, font=None): - text_lines_len = len(text_lines) - if text_lines_len == 0: - return - - l,t,r,b = rect - h = b-t - h_per_line = h // text_lines_len - - for i in range(0, text_lines_len): - draw_text (image, (l, i*h_per_line, r, (i+1)*h_per_line), text_lines[i], color, border, font) - -def get_draw_text_lines ( image, rect, text_lines, color=(1,1,1), border=0.2, font=None): - image = np.zeros ( image.shape, dtype=np.float ) - draw_text_lines ( image, rect, text_lines, color, border, font) - return image +import localization +import numpy as np +from PIL import Image, ImageDraw, ImageFont + +pil_fonts = {} +def _get_pil_font (font, size): + global pil_fonts + try: + font_str_id = '%s_%d' % (font, size) + if font_str_id not in pil_fonts.keys(): + pil_fonts[font_str_id] = ImageFont.truetype(font + ".ttf", size=size, encoding="unic") + pil_font = pil_fonts[font_str_id] + return pil_font + except: + return ImageFont.load_default() + +def get_text_image( shape, text, color=(1,1,1), border=0.2, font=None): + h,w,c = shape + try: + pil_font = _get_pil_font( localization.get_default_ttf_font_name() , h-2) + + canvas = Image.new('RGB', (w,h) , (0,0,0) ) + draw = ImageDraw.Draw(canvas) + offset = ( 0, 0) + draw.text(offset, text, font=pil_font, fill=tuple((np.array(color)*255).astype(np.int)) ) + + result = np.asarray(canvas) / 255 + + if c > 3: + result = np.concatenate ( (result, np.ones ((h,w,c-3)) ), axis=-1 ) + elif c < 3: + result = result[...,0:c] + return result + except: + return np.zeros ( (h,w,c) ) + +def draw_text( image, rect, text, color=(1,1,1), border=0.2, font=None): + h,w,c = image.shape + + l,t,r,b = rect + l = np.clip (l, 0, w-1) + r = np.clip (r, 0, w-1) + t = np.clip (t, 0, h-1) + b = np.clip (b, 0, h-1) + + image[t:b, l:r] += get_text_image ( (b-t,r-l,c) , text, color, border, font ) + + +def draw_text_lines (image, rect, text_lines, color=(1,1,1), border=0.2, font=None): + text_lines_len = len(text_lines) + if text_lines_len == 0: + return + + l,t,r,b = rect + h = b-t + h_per_line = h // text_lines_len + + for i in range(0, text_lines_len): + draw_text (image, (l, i*h_per_line, r, (i+1)*h_per_line), text_lines[i], color, border, font) + +def get_draw_text_lines ( image, rect, text_lines, color=(1,1,1), border=0.2, font=None): + image = np.zeros ( image.shape, dtype=np.float ) + draw_text_lines ( image, rect, text_lines, color, border, font) + return image diff --git a/imagelib/warp.py b/imagelib/warp.py index 9d5754d..aa0f602 100644 --- a/imagelib/warp.py +++ b/imagelib/warp.py @@ -1,51 +1,51 @@ -import numpy as np -import cv2 -from utils import random_utils - -def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): - h,w,c = source.shape - if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024): - raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.') - - rotation = np.random.uniform( rotation_range[0], rotation_range[1] ) - scale = np.random.uniform(1 +scale_range[0], 1 +scale_range[1]) - tx = np.random.uniform( tx_range[0], tx_range[1] ) - ty = np.random.uniform( ty_range[0], ty_range[1] ) - - #random warp by grid - cell_size = [ w // (2**i) for i in range(1,4) ] [ np.random.randint(3) ] - cell_count = w // cell_size + 1 - - grid_points = np.linspace( 0, w, cell_count) - mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy() - mapy = mapx.T - - mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) - mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) - - half_cell_size = cell_size // 2 - - mapx = cv2.resize(mapx, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) - mapy = cv2.resize(mapy, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) - - #random transform - random_transform_mat = cv2.getRotationMatrix2D((w // 2, w // 2), rotation, scale) - random_transform_mat[:, 2] += (tx*w, ty*w) - - params = dict() - params['mapx'] = mapx - params['mapy'] = mapy - params['rmat'] = random_transform_mat - params['w'] = w - params['flip'] = flip and np.random.randint(10) < 4 - - return params - -def warp_by_params (params, img, warp, transform, flip, is_border_replicate): - if warp: - img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC ) - if transform: - img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if is_border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC ) - if flip and params['flip']: - img = img[:,::-1,...] +import numpy as np +import cv2 +from utils import random_utils + +def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): + h,w,c = source.shape + if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024): + raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.') + + rotation = np.random.uniform( rotation_range[0], rotation_range[1] ) + scale = np.random.uniform(1 +scale_range[0], 1 +scale_range[1]) + tx = np.random.uniform( tx_range[0], tx_range[1] ) + ty = np.random.uniform( ty_range[0], ty_range[1] ) + + #random warp by grid + cell_size = [ w // (2**i) for i in range(1,4) ] [ np.random.randint(3) ] + cell_count = w // cell_size + 1 + + grid_points = np.linspace( 0, w, cell_count) + mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy() + mapy = mapx.T + + mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + + half_cell_size = cell_size // 2 + + mapx = cv2.resize(mapx, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) + mapy = cv2.resize(mapy, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) + + #random transform + random_transform_mat = cv2.getRotationMatrix2D((w // 2, w // 2), rotation, scale) + random_transform_mat[:, 2] += (tx*w, ty*w) + + params = dict() + params['mapx'] = mapx + params['mapy'] = mapy + params['rmat'] = random_transform_mat + params['w'] = w + params['flip'] = flip and np.random.randint(10) < 4 + + return params + +def warp_by_params (params, img, warp, transform, flip, is_border_replicate): + if warp: + img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC ) + if transform: + img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if is_border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC ) + if flip and params['flip']: + img = img[:,::-1,...] return img \ No newline at end of file diff --git a/interact/__init__.py b/interact/__init__.py index d6f770a..db40e4f 100644 --- a/interact/__init__.py +++ b/interact/__init__.py @@ -1 +1 @@ -from .interact import interact +from .interact import interact diff --git a/interact/interact.py b/interact/interact.py index 323c157..c9b22f1 100644 --- a/interact/interact.py +++ b/interact/interact.py @@ -1,404 +1,404 @@ -import multiprocessing -import os -import sys -import time -import types - -import cv2 -from tqdm import tqdm - -try: - import IPython #if success we are in colab - from IPython.display import display, clear_output - import PIL - import matplotlib.pyplot as plt - is_colab = True -except: - is_colab = False - -class InteractBase(object): - EVENT_LBUTTONDOWN = 1 - EVENT_LBUTTONUP = 2 - EVENT_MBUTTONDOWN = 3 - EVENT_MBUTTONUP = 4 - EVENT_RBUTTONDOWN = 5 - EVENT_RBUTTONUP = 6 - EVENT_MOUSEWHEEL = 10 - - def __init__(self): - self.named_windows = {} - self.capture_mouse_windows = {} - self.capture_keys_windows = {} - self.mouse_events = {} - self.key_events = {} - self.pg_bar = None - self.focus_wnd_name = None - - def is_support_windows(self): - return False - - def is_colab(self): - return False - - def on_destroy_all_windows(self): - raise NotImplemented - - def on_create_window (self, wnd_name): - raise NotImplemented - - def on_destroy_window (self, wnd_name): - raise NotImplemented - - def on_show_image (self, wnd_name, img): - raise NotImplemented - - def on_capture_mouse (self, wnd_name): - raise NotImplemented - - def on_capture_keys (self, wnd_name): - raise NotImplemented - - def on_process_messages(self, sleep_time=0): - raise NotImplemented - - def on_wait_any_key(self): - raise NotImplemented - - def log_info(self, msg, end='\n'): - print (msg, end=end) - - def log_err(self, msg, end='\n'): - print (msg, end=end) - - def named_window(self, wnd_name): - if wnd_name not in self.named_windows: - #we will show window only on first show_image - self.named_windows[wnd_name] = 0 - self.focus_wnd_name = wnd_name - else: print("named_window: ", wnd_name, " already created.") - - def destroy_all_windows(self): - if len( self.named_windows ) != 0: - self.on_destroy_all_windows() - self.named_windows = {} - self.capture_mouse_windows = {} - self.capture_keys_windows = {} - self.mouse_events = {} - self.key_events = {} - self.focus_wnd_name = None - - def destroy_window(self, wnd_name): - if wnd_name in self.named_windows: - self.on_destroy_window(wnd_name) - self.named_windows.pop(wnd_name) - - if wnd_name == self.focus_wnd_name: - self.focus_wnd_name = list(self.named_windows.keys())[-1] if len( self.named_windows ) != 0 else None - - if wnd_name in self.capture_mouse_windows: - self.capture_mouse_windows.pop(wnd_name) - - if wnd_name in self.capture_keys_windows: - self.capture_keys_windows.pop(wnd_name) - - if wnd_name in self.mouse_events: - self.mouse_events.pop(wnd_name) - - if wnd_name in self.key_events: - self.key_events.pop(wnd_name) - - def show_image(self, wnd_name, img): - if wnd_name in self.named_windows: - if self.named_windows[wnd_name] == 0: - self.named_windows[wnd_name] = 1 - self.on_create_window(wnd_name) - if wnd_name in self.capture_mouse_windows: - self.capture_mouse(wnd_name) - self.on_show_image(wnd_name,img) - else: print("show_image: named_window ", wnd_name, " not found.") - - def capture_mouse(self, wnd_name): - if wnd_name in self.named_windows: - self.capture_mouse_windows[wnd_name] = True - if self.named_windows[wnd_name] == 1: - self.on_capture_mouse(wnd_name) - else: print("capture_mouse: named_window ", wnd_name, " not found.") - - def capture_keys(self, wnd_name): - if wnd_name in self.named_windows: - if wnd_name not in self.capture_keys_windows: - self.capture_keys_windows[wnd_name] = True - self.on_capture_keys(wnd_name) - else: print("capture_keys: already set for window ", wnd_name) - else: print("capture_keys: named_window ", wnd_name, " not found.") - - def progress_bar(self, desc, total, leave=True): - if self.pg_bar is None: - self.pg_bar = tqdm( total=total, desc=desc, leave=leave, ascii=True ) - else: print("progress_bar: already set.") - - def progress_bar_inc(self, c): - if self.pg_bar is not None: - self.pg_bar.n += c - self.pg_bar.refresh() - else: print("progress_bar not set.") - - def progress_bar_close(self): - if self.pg_bar is not None: - self.pg_bar.close() - self.pg_bar = None - else: print("progress_bar not set.") - - def progress_bar_generator(self, data, desc, leave=True): - for x in tqdm( data, desc=desc, leave=leave, ascii=True ): - yield x - - def process_messages(self, sleep_time=0): - self.on_process_messages(sleep_time) - - def wait_any_key(self): - self.on_wait_any_key() - - def add_mouse_event(self, wnd_name, x, y, ev, flags): - if wnd_name not in self.mouse_events: - self.mouse_events[wnd_name] = [] - self.mouse_events[wnd_name] += [ (x, y, ev, flags) ] - - def add_key_event(self, wnd_name, ord_key, ctrl_pressed, alt_pressed, shift_pressed): - if wnd_name not in self.key_events: - self.key_events[wnd_name] = [] - self.key_events[wnd_name] += [ (ord_key, chr(ord_key), ctrl_pressed, alt_pressed, shift_pressed) ] - - def get_mouse_events(self, wnd_name): - ar = self.mouse_events.get(wnd_name, []) - self.mouse_events[wnd_name] = [] - return ar - - def get_key_events(self, wnd_name): - ar = self.key_events.get(wnd_name, []) - self.key_events[wnd_name] = [] - return ar - - def input_number(self, s, default_value, valid_list=None, help_message=None): - while True: - try: - inp = input(s) - if len(inp) == 0: - raise ValueError("") - - if help_message is not None and inp == '?': - print (help_message) - continue - - i = float(inp) - if (valid_list is not None) and (i not in valid_list): - return default_value - return i - except: - print (default_value) - return default_value - - def input_int(self,s, default_value, valid_list=None, help_message=None): - while True: - try: - inp = input(s) - if len(inp) == 0: - raise ValueError("") - - if help_message is not None and inp == '?': - print (help_message) - continue - - i = int(inp) - if (valid_list is not None) and (i not in valid_list): - return default_value - return i - except: - print (default_value) - return default_value - - def input_bool(self, s, default_value, help_message=None): - while True: - try: - inp = input(s) - if len(inp) == 0: - raise ValueError("") - - if help_message is not None and inp == '?': - print (help_message) - continue - - return bool ( {"y":True,"n":False,"1":True,"0":False}.get(inp.lower(), default_value) ) - except: - print ( "y" if default_value else "n" ) - return default_value - - def input_str(self, s, default_value, valid_list=None, help_message=None): - while True: - try: - inp = input(s) - if len(inp) == 0: - raise ValueError("") - - if help_message is not None and inp == '?': - print (help_message) - continue - - if valid_list is not None: - if inp.lower() in valid_list: - return inp.lower() - if inp in valid_list: - return inp - return default_value - - return inp - - except: - print (default_value) - return default_value - - def input_process(self, stdin_fd, sq, str): - sys.stdin = os.fdopen(stdin_fd) - try: - inp = input (str) - sq.put (True) - except: - sq.put (False) - - def input_in_time (self, str, max_time_sec): - sq = multiprocessing.Queue() - p = multiprocessing.Process(target=self.input_process, args=( sys.stdin.fileno(), sq, str)) - p.start() - t = time.time() - inp = False - while True: - if not sq.empty(): - inp = sq.get() - break - if time.time() - t > max_time_sec: - break - p.terminate() - sys.stdin = os.fdopen( sys.stdin.fileno() ) - return inp - - - -class InteractDesktop(InteractBase): - - def is_support_windows(self): - return True - - def on_destroy_all_windows(self): - cv2.destroyAllWindows() - - def on_create_window (self, wnd_name): - cv2.namedWindow(wnd_name) - - def on_destroy_window (self, wnd_name): - cv2.destroyWindow(wnd_name) - - def on_show_image (self, wnd_name, img): - cv2.imshow (wnd_name, img) - - def on_capture_mouse (self, wnd_name): - self.last_xy = (0,0) - - def onMouse(event, x, y, flags, param): - (inst, wnd_name) = param - if event == cv2.EVENT_LBUTTONDOWN: ev = InteractBase.EVENT_LBUTTONDOWN - elif event == cv2.EVENT_LBUTTONUP: ev = InteractBase.EVENT_LBUTTONUP - elif event == cv2.EVENT_RBUTTONDOWN: ev = InteractBase.EVENT_RBUTTONDOWN - elif event == cv2.EVENT_RBUTTONUP: ev = InteractBase.EVENT_RBUTTONUP - elif event == cv2.EVENT_MBUTTONDOWN: ev = InteractBase.EVENT_MBUTTONDOWN - elif event == cv2.EVENT_MBUTTONUP: ev = InteractBase.EVENT_MBUTTONUP - elif event == cv2.EVENT_MOUSEWHEEL: - ev = InteractBase.EVENT_MOUSEWHEEL - x,y = self.last_xy #fix opencv bug when window size more than screen size - else: ev = 0 - - self.last_xy = (x,y) - inst.add_mouse_event (wnd_name, x, y, ev, flags) - cv2.setMouseCallback(wnd_name, onMouse, (self,wnd_name) ) - - def on_capture_keys (self, wnd_name): - pass - - def on_process_messages(self, sleep_time=0): - - has_windows = False - has_capture_keys = False - - if len(self.named_windows) != 0: - has_windows = True - - if len(self.capture_keys_windows) != 0: - has_capture_keys = True - - if has_windows or has_capture_keys: - wait_key_time = max(1, int(sleep_time*1000) ) - ord_key = cv2.waitKey(wait_key_time) - shift_pressed = False - if ord_key != -1: - if chr(ord_key) >= 'A' and chr(ord_key) <= 'Z': - shift_pressed = True - ord_key += 32 - else: - if sleep_time != 0: - time.sleep(sleep_time) - - if has_capture_keys and ord_key != -1: - self.add_key_event ( self.focus_wnd_name, ord_key, False, False, shift_pressed) - - def on_wait_any_key(self): - cv2.waitKey(0) - -class InteractColab(InteractBase): - - def is_support_windows(self): - return False - - def is_colab(self): - return True - - def on_destroy_all_windows(self): - pass - #clear_output() - - def on_create_window (self, wnd_name): - pass - #clear_output() - - def on_destroy_window (self, wnd_name): - pass - - def on_show_image (self, wnd_name, img): - pass - # # cv2 stores colors as BGR; convert to RGB - # if img.ndim == 3: - # if img.shape[2] == 4: - # img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) - # else: - # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - # img = PIL.Image.fromarray(img) - # plt.imshow(img) - # plt.show() - - def on_capture_mouse (self, wnd_name): - pass - #print("on_capture_mouse(): Colab does not support") - - def on_capture_keys (self, wnd_name): - pass - #print("on_capture_keys(): Colab does not support") - - def on_process_messages(self, sleep_time=0): - time.sleep(sleep_time) - - def on_wait_any_key(self): - pass - #print("on_wait_any_key(): Colab does not support") - -if is_colab: - interact = InteractColab() -else: - interact = InteractDesktop() +import multiprocessing +import os +import sys +import time +import types + +import cv2 +from tqdm import tqdm + +try: + import IPython #if success we are in colab + from IPython.display import display, clear_output + import PIL + import matplotlib.pyplot as plt + is_colab = True +except: + is_colab = False + +class InteractBase(object): + EVENT_LBUTTONDOWN = 1 + EVENT_LBUTTONUP = 2 + EVENT_MBUTTONDOWN = 3 + EVENT_MBUTTONUP = 4 + EVENT_RBUTTONDOWN = 5 + EVENT_RBUTTONUP = 6 + EVENT_MOUSEWHEEL = 10 + + def __init__(self): + self.named_windows = {} + self.capture_mouse_windows = {} + self.capture_keys_windows = {} + self.mouse_events = {} + self.key_events = {} + self.pg_bar = None + self.focus_wnd_name = None + + def is_support_windows(self): + return False + + def is_colab(self): + return False + + def on_destroy_all_windows(self): + raise NotImplemented + + def on_create_window (self, wnd_name): + raise NotImplemented + + def on_destroy_window (self, wnd_name): + raise NotImplemented + + def on_show_image (self, wnd_name, img): + raise NotImplemented + + def on_capture_mouse (self, wnd_name): + raise NotImplemented + + def on_capture_keys (self, wnd_name): + raise NotImplemented + + def on_process_messages(self, sleep_time=0): + raise NotImplemented + + def on_wait_any_key(self): + raise NotImplemented + + def log_info(self, msg, end='\n'): + print (msg, end=end) + + def log_err(self, msg, end='\n'): + print (msg, end=end) + + def named_window(self, wnd_name): + if wnd_name not in self.named_windows: + #we will show window only on first show_image + self.named_windows[wnd_name] = 0 + self.focus_wnd_name = wnd_name + else: print("named_window: ", wnd_name, " already created.") + + def destroy_all_windows(self): + if len( self.named_windows ) != 0: + self.on_destroy_all_windows() + self.named_windows = {} + self.capture_mouse_windows = {} + self.capture_keys_windows = {} + self.mouse_events = {} + self.key_events = {} + self.focus_wnd_name = None + + def destroy_window(self, wnd_name): + if wnd_name in self.named_windows: + self.on_destroy_window(wnd_name) + self.named_windows.pop(wnd_name) + + if wnd_name == self.focus_wnd_name: + self.focus_wnd_name = list(self.named_windows.keys())[-1] if len( self.named_windows ) != 0 else None + + if wnd_name in self.capture_mouse_windows: + self.capture_mouse_windows.pop(wnd_name) + + if wnd_name in self.capture_keys_windows: + self.capture_keys_windows.pop(wnd_name) + + if wnd_name in self.mouse_events: + self.mouse_events.pop(wnd_name) + + if wnd_name in self.key_events: + self.key_events.pop(wnd_name) + + def show_image(self, wnd_name, img): + if wnd_name in self.named_windows: + if self.named_windows[wnd_name] == 0: + self.named_windows[wnd_name] = 1 + self.on_create_window(wnd_name) + if wnd_name in self.capture_mouse_windows: + self.capture_mouse(wnd_name) + self.on_show_image(wnd_name,img) + else: print("show_image: named_window ", wnd_name, " not found.") + + def capture_mouse(self, wnd_name): + if wnd_name in self.named_windows: + self.capture_mouse_windows[wnd_name] = True + if self.named_windows[wnd_name] == 1: + self.on_capture_mouse(wnd_name) + else: print("capture_mouse: named_window ", wnd_name, " not found.") + + def capture_keys(self, wnd_name): + if wnd_name in self.named_windows: + if wnd_name not in self.capture_keys_windows: + self.capture_keys_windows[wnd_name] = True + self.on_capture_keys(wnd_name) + else: print("capture_keys: already set for window ", wnd_name) + else: print("capture_keys: named_window ", wnd_name, " not found.") + + def progress_bar(self, desc, total, leave=True): + if self.pg_bar is None: + self.pg_bar = tqdm( total=total, desc=desc, leave=leave, ascii=True ) + else: print("progress_bar: already set.") + + def progress_bar_inc(self, c): + if self.pg_bar is not None: + self.pg_bar.n += c + self.pg_bar.refresh() + else: print("progress_bar not set.") + + def progress_bar_close(self): + if self.pg_bar is not None: + self.pg_bar.close() + self.pg_bar = None + else: print("progress_bar not set.") + + def progress_bar_generator(self, data, desc, leave=True): + for x in tqdm( data, desc=desc, leave=leave, ascii=True ): + yield x + + def process_messages(self, sleep_time=0): + self.on_process_messages(sleep_time) + + def wait_any_key(self): + self.on_wait_any_key() + + def add_mouse_event(self, wnd_name, x, y, ev, flags): + if wnd_name not in self.mouse_events: + self.mouse_events[wnd_name] = [] + self.mouse_events[wnd_name] += [ (x, y, ev, flags) ] + + def add_key_event(self, wnd_name, ord_key, ctrl_pressed, alt_pressed, shift_pressed): + if wnd_name not in self.key_events: + self.key_events[wnd_name] = [] + self.key_events[wnd_name] += [ (ord_key, chr(ord_key), ctrl_pressed, alt_pressed, shift_pressed) ] + + def get_mouse_events(self, wnd_name): + ar = self.mouse_events.get(wnd_name, []) + self.mouse_events[wnd_name] = [] + return ar + + def get_key_events(self, wnd_name): + ar = self.key_events.get(wnd_name, []) + self.key_events[wnd_name] = [] + return ar + + def input_number(self, s, default_value, valid_list=None, help_message=None): + while True: + try: + inp = input(s) + if len(inp) == 0: + raise ValueError("") + + if help_message is not None and inp == '?': + print (help_message) + continue + + i = float(inp) + if (valid_list is not None) and (i not in valid_list): + return default_value + return i + except: + print (default_value) + return default_value + + def input_int(self,s, default_value, valid_list=None, help_message=None): + while True: + try: + inp = input(s) + if len(inp) == 0: + raise ValueError("") + + if help_message is not None and inp == '?': + print (help_message) + continue + + i = int(inp) + if (valid_list is not None) and (i not in valid_list): + return default_value + return i + except: + print (default_value) + return default_value + + def input_bool(self, s, default_value, help_message=None): + while True: + try: + inp = input(s) + if len(inp) == 0: + raise ValueError("") + + if help_message is not None and inp == '?': + print (help_message) + continue + + return bool ( {"y":True,"n":False,"1":True,"0":False}.get(inp.lower(), default_value) ) + except: + print ( "y" if default_value else "n" ) + return default_value + + def input_str(self, s, default_value, valid_list=None, help_message=None): + while True: + try: + inp = input(s) + if len(inp) == 0: + raise ValueError("") + + if help_message is not None and inp == '?': + print (help_message) + continue + + if valid_list is not None: + if inp.lower() in valid_list: + return inp.lower() + if inp in valid_list: + return inp + return default_value + + return inp + + except: + print (default_value) + return default_value + + def input_process(self, stdin_fd, sq, str): + sys.stdin = os.fdopen(stdin_fd) + try: + inp = input (str) + sq.put (True) + except: + sq.put (False) + + def input_in_time (self, str, max_time_sec): + sq = multiprocessing.Queue() + p = multiprocessing.Process(target=self.input_process, args=( sys.stdin.fileno(), sq, str)) + p.start() + t = time.time() + inp = False + while True: + if not sq.empty(): + inp = sq.get() + break + if time.time() - t > max_time_sec: + break + p.terminate() + sys.stdin = os.fdopen( sys.stdin.fileno() ) + return inp + + + +class InteractDesktop(InteractBase): + + def is_support_windows(self): + return True + + def on_destroy_all_windows(self): + cv2.destroyAllWindows() + + def on_create_window (self, wnd_name): + cv2.namedWindow(wnd_name) + + def on_destroy_window (self, wnd_name): + cv2.destroyWindow(wnd_name) + + def on_show_image (self, wnd_name, img): + cv2.imshow (wnd_name, img) + + def on_capture_mouse (self, wnd_name): + self.last_xy = (0,0) + + def onMouse(event, x, y, flags, param): + (inst, wnd_name) = param + if event == cv2.EVENT_LBUTTONDOWN: ev = InteractBase.EVENT_LBUTTONDOWN + elif event == cv2.EVENT_LBUTTONUP: ev = InteractBase.EVENT_LBUTTONUP + elif event == cv2.EVENT_RBUTTONDOWN: ev = InteractBase.EVENT_RBUTTONDOWN + elif event == cv2.EVENT_RBUTTONUP: ev = InteractBase.EVENT_RBUTTONUP + elif event == cv2.EVENT_MBUTTONDOWN: ev = InteractBase.EVENT_MBUTTONDOWN + elif event == cv2.EVENT_MBUTTONUP: ev = InteractBase.EVENT_MBUTTONUP + elif event == cv2.EVENT_MOUSEWHEEL: + ev = InteractBase.EVENT_MOUSEWHEEL + x,y = self.last_xy #fix opencv bug when window size more than screen size + else: ev = 0 + + self.last_xy = (x,y) + inst.add_mouse_event (wnd_name, x, y, ev, flags) + cv2.setMouseCallback(wnd_name, onMouse, (self,wnd_name) ) + + def on_capture_keys (self, wnd_name): + pass + + def on_process_messages(self, sleep_time=0): + + has_windows = False + has_capture_keys = False + + if len(self.named_windows) != 0: + has_windows = True + + if len(self.capture_keys_windows) != 0: + has_capture_keys = True + + if has_windows or has_capture_keys: + wait_key_time = max(1, int(sleep_time*1000) ) + ord_key = cv2.waitKey(wait_key_time) + shift_pressed = False + if ord_key != -1: + if chr(ord_key) >= 'A' and chr(ord_key) <= 'Z': + shift_pressed = True + ord_key += 32 + else: + if sleep_time != 0: + time.sleep(sleep_time) + + if has_capture_keys and ord_key != -1: + self.add_key_event ( self.focus_wnd_name, ord_key, False, False, shift_pressed) + + def on_wait_any_key(self): + cv2.waitKey(0) + +class InteractColab(InteractBase): + + def is_support_windows(self): + return False + + def is_colab(self): + return True + + def on_destroy_all_windows(self): + pass + #clear_output() + + def on_create_window (self, wnd_name): + pass + #clear_output() + + def on_destroy_window (self, wnd_name): + pass + + def on_show_image (self, wnd_name, img): + pass + # # cv2 stores colors as BGR; convert to RGB + # if img.ndim == 3: + # if img.shape[2] == 4: + # img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) + # else: + # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + # img = PIL.Image.fromarray(img) + # plt.imshow(img) + # plt.show() + + def on_capture_mouse (self, wnd_name): + pass + #print("on_capture_mouse(): Colab does not support") + + def on_capture_keys (self, wnd_name): + pass + #print("on_capture_keys(): Colab does not support") + + def on_process_messages(self, sleep_time=0): + time.sleep(sleep_time) + + def on_wait_any_key(self): + pass + #print("on_wait_any_key(): Colab does not support") + +if is_colab: + interact = InteractColab() +else: + interact = InteractDesktop() diff --git a/joblib/SubprocessFunctionCaller.py b/joblib/SubprocessFunctionCaller.py index f7997b4..4ea3101 100644 --- a/joblib/SubprocessFunctionCaller.py +++ b/joblib/SubprocessFunctionCaller.py @@ -1,42 +1,42 @@ -import time -import multiprocessing - -class SubprocessFunctionCaller(object): - class CliFunction(object): - def __init__(self, s2c, c2s, lock): - self.s2c = s2c - self.c2s = c2s - self.lock = lock - - def __call__(self, *args, **kwargs): - self.lock.acquire() - self.c2s.put ( {'args':args, 'kwargs':kwargs} ) - while True: - if not self.s2c.empty(): - obj = self.s2c.get() - self.lock.release() - return obj - time.sleep(0.005) - - class HostProcessor(object): - def __init__(self, s2c, c2s, func): - self.s2c = s2c - self.c2s = c2s - self.func = func - - def process_messages(self): - while not self.c2s.empty(): - obj = self.c2s.get() - result = self.func ( *obj['args'], **obj['kwargs'] ) - self.s2c.put (result) - - @staticmethod - def make_pair( func ): - s2c = multiprocessing.Queue() - c2s = multiprocessing.Queue() - lock = multiprocessing.Lock() - - host_processor = SubprocessFunctionCaller.HostProcessor (s2c, c2s, func) - cli_func = SubprocessFunctionCaller.CliFunction (s2c, c2s, lock) - - return host_processor, cli_func +import time +import multiprocessing + +class SubprocessFunctionCaller(object): + class CliFunction(object): + def __init__(self, s2c, c2s, lock): + self.s2c = s2c + self.c2s = c2s + self.lock = lock + + def __call__(self, *args, **kwargs): + self.lock.acquire() + self.c2s.put ( {'args':args, 'kwargs':kwargs} ) + while True: + if not self.s2c.empty(): + obj = self.s2c.get() + self.lock.release() + return obj + time.sleep(0.005) + + class HostProcessor(object): + def __init__(self, s2c, c2s, func): + self.s2c = s2c + self.c2s = c2s + self.func = func + + def process_messages(self): + while not self.c2s.empty(): + obj = self.c2s.get() + result = self.func ( *obj['args'], **obj['kwargs'] ) + self.s2c.put (result) + + @staticmethod + def make_pair( func ): + s2c = multiprocessing.Queue() + c2s = multiprocessing.Queue() + lock = multiprocessing.Lock() + + host_processor = SubprocessFunctionCaller.HostProcessor (s2c, c2s, func) + cli_func = SubprocessFunctionCaller.CliFunction (s2c, c2s, lock) + + return host_processor, cli_func diff --git a/joblib/SubprocessorBase.py b/joblib/SubprocessorBase.py index 91540fd..dd48424 100644 --- a/joblib/SubprocessorBase.py +++ b/joblib/SubprocessorBase.py @@ -1,288 +1,288 @@ -import traceback -import multiprocessing -import time -import sys -from interact import interact as io - - -class Subprocessor(object): - - class SilenceException(Exception): - pass - - class Cli(object): - def __init__ ( self, client_dict ): - self.s2c = multiprocessing.Queue() - self.c2s = multiprocessing.Queue() - self.p = multiprocessing.Process(target=self._subprocess_run, args=(client_dict,) ) - self.p.daemon = True - self.p.start() - - self.state = None - self.sent_time = None - self.sent_data = None - self.name = None - self.host_dict = None - - def kill(self): - self.p.terminate() - self.p.join() - - #overridable optional - def on_initialize(self, client_dict): - #initialize your subprocess here using client_dict - pass - - #overridable optional - def on_finalize(self): - #finalize your subprocess here - pass - - #overridable - def process_data(self, data): - #process 'data' given from host and return result - raise NotImplementedError - - #overridable optional - def get_data_name (self, data): - #return string identificator of your 'data' - return "undefined" - - def log_info(self, msg): self.c2s.put ( {'op': 'log_info', 'msg':msg } ) - def log_err(self, msg): self.c2s.put ( {'op': 'log_err' , 'msg':msg } ) - def progress_bar_inc(self, c): self.c2s.put ( {'op': 'progress_bar_inc' , 'c':c } ) - - def _subprocess_run(self, client_dict): - data = None - s2c, c2s = self.s2c, self.c2s - try: - self.on_initialize(client_dict) - - c2s.put ( {'op': 'init_ok'} ) - - while True: - msg = s2c.get() - op = msg.get('op','') - if op == 'data': - data = msg['data'] - result = self.process_data (data) - c2s.put ( {'op': 'success', 'data' : data, 'result' : result} ) - data = None - elif op == 'close': - break - - time.sleep(0.001) - - self.on_finalize() - c2s.put ( {'op': 'finalized'} ) - return - except Subprocessor.SilenceException as e: - pass - except Exception as e: - if data is not None: - print ('Exception while process data [%s]: %s' % (self.get_data_name(data), traceback.format_exc()) ) - else: - print ('Exception: %s' % (traceback.format_exc()) ) - - c2s.put ( {'op': 'error', 'data' : data} ) - - #overridable - def __init__(self, name, SubprocessorCli_class, no_response_time_sec = 0): - if not issubclass(SubprocessorCli_class, Subprocessor.Cli): - raise ValueError("SubprocessorCli_class must be subclass of Subprocessor.Cli") - - self.name = name - self.SubprocessorCli_class = SubprocessorCli_class - self.no_response_time_sec = no_response_time_sec - - #overridable - def process_info_generator(self): - #yield per process (name, host_dict, client_dict) - raise NotImplementedError - - #overridable optional - def on_clients_initialized(self): - #logic when all subprocesses initialized and ready - pass - - #overridable optional - def on_clients_finalized(self): - #logic when all subprocess finalized - pass - - #overridable - def get_data(self, host_dict): - #return data for processing here - raise NotImplementedError - - #overridable - def on_data_return (self, host_dict, data): - #you have to place returned 'data' back to your queue - raise NotImplementedError - - #overridable - def on_result (self, host_dict, data, result): - #your logic what to do with 'result' of 'data' - raise NotImplementedError - - #overridable - def get_result(self): - #return result that will be returned in func run() - raise NotImplementedError - - #overridable - def on_tick(self): - #tick in main loop - pass - - #overridable - def on_check_run(self): - return True - - def run(self): - if not self.on_check_run(): - return self.get_result() - - self.clis = [] - - #getting info about name of subprocesses, host and client dicts, and spawning them - for name, host_dict, client_dict in self.process_info_generator(): - try: - cli = self.SubprocessorCli_class(client_dict) - cli.state = 1 - cli.sent_time = time.time() - cli.sent_data = None - cli.name = name - cli.host_dict = host_dict - - self.clis.append (cli) - - while True: - while not cli.c2s.empty(): - obj = cli.c2s.get() - op = obj.get('op','') - if op == 'init_ok': - cli.state = 0 - elif op == 'log_info': - io.log_info(obj['msg']) - elif op == 'log_err': - io.log_err(obj['msg']) - elif op == 'error': - cli.kill() - self.clis.remove(cli) - break - if cli.state == 0: - break - io.process_messages(0.005) - except: - raise Exception ("Unable to start subprocess %s" % (name)) - - if len(self.clis) == 0: - raise Exception ("Unable to start Subprocessor '%s' " % (self.name)) - - #waiting subprocesses their success(or not) initialization - while True: - for cli in self.clis[:]: - while not cli.c2s.empty(): - obj = cli.c2s.get() - op = obj.get('op','') - if op == 'init_ok': - cli.state = 0 - elif op == 'log_info': - io.log_info(obj['msg']) - elif op == 'log_err': - io.log_err(obj['msg']) - elif op == 'error': - cli.kill() - self.clis.remove(cli) - break - if all ([cli.state == 0 for cli in self.clis]): - break - io.process_messages(0.005) - - if len(self.clis) == 0: - raise Exception ( "Unable to start subprocesses." ) - - #ok some processes survived, initialize host logic - - self.on_clients_initialized() - - #main loop of data processing - while True: - for cli in self.clis[:]: - while not cli.c2s.empty(): - obj = cli.c2s.get() - op = obj.get('op','') - if op == 'success': - #success processed data, return data and result to on_result - self.on_result (cli.host_dict, obj['data'], obj['result']) - self.sent_data = None - cli.state = 0 - elif op == 'error': - #some error occured while process data, returning chunk to on_data_return - if 'data' in obj.keys(): - self.on_data_return (cli.host_dict, obj['data'] ) - #and killing process - cli.kill() - self.clis.remove(cli) - elif op == 'log_info': - io.log_info(obj['msg']) - elif op == 'log_err': - io.log_err(obj['msg']) - elif op == 'progress_bar_inc': - io.progress_bar_inc(obj['c']) - - for cli in self.clis[:]: - if cli.state == 0: - #free state of subprocess, get some data from get_data - data = self.get_data(cli.host_dict) - if data is not None: - #and send it to subprocess - cli.s2c.put ( {'op': 'data', 'data' : data} ) - cli.sent_time = time.time() - cli.sent_data = data - cli.state = 1 - - elif cli.state == 1: - if self.no_response_time_sec != 0 and (time.time() - cli.sent_time) > self.no_response_time_sec: - #subprocess busy too long - print ( '%s doesnt response, terminating it.' % (cli.name) ) - self.on_data_return (cli.host_dict, cli.sent_data ) - cli.kill() - self.clis.remove(cli) - - if all ([cli.state == 0 for cli in self.clis]): - #all subprocesses free and no more data available to process, ending loop - break - io.process_messages(0.005) - self.on_tick() - - #gracefully terminating subprocesses - for cli in self.clis[:]: - cli.s2c.put ( {'op': 'close'} ) - cli.sent_time = time.time() - - while True: - for cli in self.clis[:]: - terminate_it = False - while not cli.c2s.empty(): - obj = cli.c2s.get() - obj_op = obj['op'] - if obj_op == 'finalized': - terminate_it = True - break - - if self.no_response_time_sec != 0 and (time.time() - cli.sent_time) > self.no_response_time_sec: - terminate_it = True - - if terminate_it: - cli.state = 2 - cli.kill() - - if all ([cli.state == 2 for cli in self.clis]): - break - - #finalizing host logic and return result - self.on_clients_finalized() - - return self.get_result() +import traceback +import multiprocessing +import time +import sys +from interact import interact as io + + +class Subprocessor(object): + + class SilenceException(Exception): + pass + + class Cli(object): + def __init__ ( self, client_dict ): + self.s2c = multiprocessing.Queue() + self.c2s = multiprocessing.Queue() + self.p = multiprocessing.Process(target=self._subprocess_run, args=(client_dict,) ) + self.p.daemon = True + self.p.start() + + self.state = None + self.sent_time = None + self.sent_data = None + self.name = None + self.host_dict = None + + def kill(self): + self.p.terminate() + self.p.join() + + #overridable optional + def on_initialize(self, client_dict): + #initialize your subprocess here using client_dict + pass + + #overridable optional + def on_finalize(self): + #finalize your subprocess here + pass + + #overridable + def process_data(self, data): + #process 'data' given from host and return result + raise NotImplementedError + + #overridable optional + def get_data_name (self, data): + #return string identificator of your 'data' + return "undefined" + + def log_info(self, msg): self.c2s.put ( {'op': 'log_info', 'msg':msg } ) + def log_err(self, msg): self.c2s.put ( {'op': 'log_err' , 'msg':msg } ) + def progress_bar_inc(self, c): self.c2s.put ( {'op': 'progress_bar_inc' , 'c':c } ) + + def _subprocess_run(self, client_dict): + data = None + s2c, c2s = self.s2c, self.c2s + try: + self.on_initialize(client_dict) + + c2s.put ( {'op': 'init_ok'} ) + + while True: + msg = s2c.get() + op = msg.get('op','') + if op == 'data': + data = msg['data'] + result = self.process_data (data) + c2s.put ( {'op': 'success', 'data' : data, 'result' : result} ) + data = None + elif op == 'close': + break + + time.sleep(0.001) + + self.on_finalize() + c2s.put ( {'op': 'finalized'} ) + return + except Subprocessor.SilenceException as e: + pass + except Exception as e: + if data is not None: + print ('Exception while process data [%s]: %s' % (self.get_data_name(data), traceback.format_exc()) ) + else: + print ('Exception: %s' % (traceback.format_exc()) ) + + c2s.put ( {'op': 'error', 'data' : data} ) + + #overridable + def __init__(self, name, SubprocessorCli_class, no_response_time_sec = 0): + if not issubclass(SubprocessorCli_class, Subprocessor.Cli): + raise ValueError("SubprocessorCli_class must be subclass of Subprocessor.Cli") + + self.name = name + self.SubprocessorCli_class = SubprocessorCli_class + self.no_response_time_sec = no_response_time_sec + + #overridable + def process_info_generator(self): + #yield per process (name, host_dict, client_dict) + raise NotImplementedError + + #overridable optional + def on_clients_initialized(self): + #logic when all subprocesses initialized and ready + pass + + #overridable optional + def on_clients_finalized(self): + #logic when all subprocess finalized + pass + + #overridable + def get_data(self, host_dict): + #return data for processing here + raise NotImplementedError + + #overridable + def on_data_return (self, host_dict, data): + #you have to place returned 'data' back to your queue + raise NotImplementedError + + #overridable + def on_result (self, host_dict, data, result): + #your logic what to do with 'result' of 'data' + raise NotImplementedError + + #overridable + def get_result(self): + #return result that will be returned in func run() + raise NotImplementedError + + #overridable + def on_tick(self): + #tick in main loop + pass + + #overridable + def on_check_run(self): + return True + + def run(self): + if not self.on_check_run(): + return self.get_result() + + self.clis = [] + + #getting info about name of subprocesses, host and client dicts, and spawning them + for name, host_dict, client_dict in self.process_info_generator(): + try: + cli = self.SubprocessorCli_class(client_dict) + cli.state = 1 + cli.sent_time = time.time() + cli.sent_data = None + cli.name = name + cli.host_dict = host_dict + + self.clis.append (cli) + + while True: + while not cli.c2s.empty(): + obj = cli.c2s.get() + op = obj.get('op','') + if op == 'init_ok': + cli.state = 0 + elif op == 'log_info': + io.log_info(obj['msg']) + elif op == 'log_err': + io.log_err(obj['msg']) + elif op == 'error': + cli.kill() + self.clis.remove(cli) + break + if cli.state == 0: + break + io.process_messages(0.005) + except: + raise Exception ("Unable to start subprocess %s" % (name)) + + if len(self.clis) == 0: + raise Exception ("Unable to start Subprocessor '%s' " % (self.name)) + + #waiting subprocesses their success(or not) initialization + while True: + for cli in self.clis[:]: + while not cli.c2s.empty(): + obj = cli.c2s.get() + op = obj.get('op','') + if op == 'init_ok': + cli.state = 0 + elif op == 'log_info': + io.log_info(obj['msg']) + elif op == 'log_err': + io.log_err(obj['msg']) + elif op == 'error': + cli.kill() + self.clis.remove(cli) + break + if all ([cli.state == 0 for cli in self.clis]): + break + io.process_messages(0.005) + + if len(self.clis) == 0: + raise Exception ( "Unable to start subprocesses." ) + + #ok some processes survived, initialize host logic + + self.on_clients_initialized() + + #main loop of data processing + while True: + for cli in self.clis[:]: + while not cli.c2s.empty(): + obj = cli.c2s.get() + op = obj.get('op','') + if op == 'success': + #success processed data, return data and result to on_result + self.on_result (cli.host_dict, obj['data'], obj['result']) + self.sent_data = None + cli.state = 0 + elif op == 'error': + #some error occured while process data, returning chunk to on_data_return + if 'data' in obj.keys(): + self.on_data_return (cli.host_dict, obj['data'] ) + #and killing process + cli.kill() + self.clis.remove(cli) + elif op == 'log_info': + io.log_info(obj['msg']) + elif op == 'log_err': + io.log_err(obj['msg']) + elif op == 'progress_bar_inc': + io.progress_bar_inc(obj['c']) + + for cli in self.clis[:]: + if cli.state == 0: + #free state of subprocess, get some data from get_data + data = self.get_data(cli.host_dict) + if data is not None: + #and send it to subprocess + cli.s2c.put ( {'op': 'data', 'data' : data} ) + cli.sent_time = time.time() + cli.sent_data = data + cli.state = 1 + + elif cli.state == 1: + if self.no_response_time_sec != 0 and (time.time() - cli.sent_time) > self.no_response_time_sec: + #subprocess busy too long + print ( '%s doesnt response, terminating it.' % (cli.name) ) + self.on_data_return (cli.host_dict, cli.sent_data ) + cli.kill() + self.clis.remove(cli) + + if all ([cli.state == 0 for cli in self.clis]): + #all subprocesses free and no more data available to process, ending loop + break + io.process_messages(0.005) + self.on_tick() + + #gracefully terminating subprocesses + for cli in self.clis[:]: + cli.s2c.put ( {'op': 'close'} ) + cli.sent_time = time.time() + + while True: + for cli in self.clis[:]: + terminate_it = False + while not cli.c2s.empty(): + obj = cli.c2s.get() + obj_op = obj['op'] + if obj_op == 'finalized': + terminate_it = True + break + + if self.no_response_time_sec != 0 and (time.time() - cli.sent_time) > self.no_response_time_sec: + terminate_it = True + + if terminate_it: + cli.state = 2 + cli.kill() + + if all ([cli.state == 2 for cli in self.clis]): + break + + #finalizing host logic and return result + self.on_clients_finalized() + + return self.get_result() diff --git a/joblib/__init__.py b/joblib/__init__.py index 651050a..fbbc20c 100644 --- a/joblib/__init__.py +++ b/joblib/__init__.py @@ -1,2 +1,2 @@ -from .SubprocessorBase import Subprocessor -from .SubprocessFunctionCaller import SubprocessFunctionCaller +from .SubprocessorBase import Subprocessor +from .SubprocessFunctionCaller import SubprocessFunctionCaller diff --git a/localization/__init__.py b/localization/__init__.py index 4e77f1f..f3bcf09 100644 --- a/localization/__init__.py +++ b/localization/__init__.py @@ -1,2 +1,2 @@ -from .localization import get_default_ttf_font_name - +from .localization import get_default_ttf_font_name + diff --git a/localization/localization.py b/localization/localization.py index 01f1d1e..82fa575 100644 --- a/localization/localization.py +++ b/localization/localization.py @@ -1,30 +1,30 @@ -import sys -import locale - -system_locale = locale.getdefaultlocale()[0] -# system_locale may be nil -system_language = system_locale[0:2] if system_locale is not None else "en" - -windows_font_name_map = { - 'en' : 'cour', - 'ru' : 'cour', - 'zn' : 'simsun_01' -} - -darwin_font_name_map = { - 'en' : 'cour', - 'ru' : 'cour', - 'zn' : 'Apple LiSung Light' -} - -linux_font_name_map = { - 'en' : 'cour', - 'ru' : 'cour', - 'zn' : 'cour' -} - -def get_default_ttf_font_name(): - platform = sys.platform - if platform == 'win32': return windows_font_name_map.get(system_language, 'cour') - elif platform == 'darwin': return darwin_font_name_map.get(system_language, 'cour') - else: return linux_font_name_map.get(system_language, 'cour') +import sys +import locale + +system_locale = locale.getdefaultlocale()[0] +# system_locale may be nil +system_language = system_locale[0:2] if system_locale is not None else "en" + +windows_font_name_map = { + 'en' : 'cour', + 'ru' : 'cour', + 'zn' : 'simsun_01' +} + +darwin_font_name_map = { + 'en' : 'cour', + 'ru' : 'cour', + 'zn' : 'Apple LiSung Light' +} + +linux_font_name_map = { + 'en' : 'cour', + 'ru' : 'cour', + 'zn' : 'cour' +} + +def get_default_ttf_font_name(): + platform = sys.platform + if platform == 'win32': return windows_font_name_map.get(system_language, 'cour') + elif platform == 'darwin': return darwin_font_name_map.get(system_language, 'cour') + else: return linux_font_name_map.get(system_language, 'cour') diff --git a/main.py b/main.py index 6f80fb1..3b0a987 100644 --- a/main.py +++ b/main.py @@ -1,278 +1,278 @@ -import os -import sys -import time -import argparse -import multiprocessing -from utils import Path_utils -from utils import os_utils -from pathlib import Path - -train_args = r'python3 main.py train --training-data-src-dir /media/user/5246EBF746EBD9AD/dfl/DFL/workspace/data_src/aligned/ --training-data-dst-dir /media/user/5246EBF746EBD9AD/dfl/DFL/workspace/data_dst/aligned/ --model-dir /media/user/5246EBF746EBD9AD/generic-fs/128h-sae-liaf/ --model SAE' - -if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 6): - raise Exception("This program requires at least Python 3.6") - -class fixPathAction(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, os.path.abspath(os.path.expanduser(values))) - -if __name__ == "__main__": - multiprocessing.set_start_method("spawn") - - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers() - - def process_extract(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Extractor - Extractor.main( arguments.input_dir, - arguments.output_dir, - arguments.debug_dir, - arguments.detector, - arguments.manual_fix, - arguments.manual_output_debug_fix, - arguments.manual_window_size, - face_type=arguments.face_type, - device_args={'cpu_only' : arguments.cpu_only, - 'multi_gpu' : arguments.multi_gpu, - } - ) - - p = subparsers.add_parser( "extract", help="Extract the faces from a pictures.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.") - p.add_argument('--debug-dir', action=fixPathAction, dest="debug_dir", help="Writes debug images to this directory.") - p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'") - p.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.") - p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") - p.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") - p.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.") - p.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=1368, help="Manual fix window size. Default: 1368.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU. Forces to use MT extractor.") - p.set_defaults (func=process_extract) - - - def process_dev_extract_umd_csv(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Extractor - Extractor.extract_umd_csv( arguments.input_csv_file, - device_args={'cpu_only' : arguments.cpu_only, - 'multi_gpu' : arguments.multi_gpu, - } - ) - - p = subparsers.add_parser( "dev_extract_umd_csv", help="") - p.add_argument('--input-csv-file', required=True, action=fixPathAction, dest="input_csv_file", help="input_csv_file") - p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU.") - p.set_defaults (func=process_dev_extract_umd_csv) - """ - def process_extract_fanseg(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Extractor - Extractor.extract_fanseg( arguments.input_dir, - device_args={'cpu_only' : arguments.cpu_only, - 'multi_gpu' : arguments.multi_gpu, - } - ) - - p = subparsers.add_parser( "extract_fanseg", help="Extract fanseg mask from faces.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU.") - p.set_defaults (func=process_extract_fanseg) - """ - - def process_sort(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Sorter - Sorter.main (input_path=arguments.input_dir, sort_by_method=arguments.sort_by_method) - - p = subparsers.add_parser( "sort", help="Sort faces in a directory.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "face-pitch", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "oneface", "final", "final-no-blur", "test"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) - p.set_defaults (func=process_sort) - - def process_util(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import Util - - if arguments.convert_png_to_jpg: - Util.convert_png_to_jpg_folder (input_path=arguments.input_dir) - - if arguments.add_landmarks_debug_images: - Util.add_landmarks_debug_images (input_path=arguments.input_dir) - - if arguments.recover_original_aligned_filename: - Util.recover_original_aligned_filename (input_path=arguments.input_dir) - - #if arguments.remove_fanseg: - # Util.remove_fanseg_folder (input_path=arguments.input_dir) - - p = subparsers.add_parser( "util", help="Utilities.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--convert-png-to-jpg', action="store_true", dest="convert_png_to_jpg", default=False, help="Convert DeepFaceLAB PNG files to JPEG.") - p.add_argument('--add-landmarks-debug-images', action="store_true", dest="add_landmarks_debug_images", default=False, help="Add landmarks debug image for aligned faces.") - p.add_argument('--recover-original-aligned-filename', action="store_true", dest="recover_original_aligned_filename", default=False, help="Recover original aligned filename.") - #p.add_argument('--remove-fanseg', action="store_true", dest="remove_fanseg", default=False, help="Remove fanseg mask from aligned faces.") - - p.set_defaults (func=process_util) - - def process_train(arguments): - os_utils.set_process_lowest_prio() - args = {'training_data_src_dir' : arguments.training_data_src_dir, - 'training_data_dst_dir' : arguments.training_data_dst_dir, - 'pretraining_data_dir' : arguments.pretraining_data_dir, - 'model_path' : arguments.model_dir, - 'model_name' : arguments.model_name, - 'no_preview' : arguments.no_preview, - 'debug' : arguments.debug, - 'execute_programs' : [ [int(x[0]), x[1] ] for x in arguments.execute_program ] - } - device_args = {'cpu_only' : arguments.cpu_only, - 'force_gpu_idx' : arguments.force_gpu_idx, - } - from mainscripts import Trainer - Trainer.main(args, device_args) - - p = subparsers.add_parser( "train", help="Trainer") - p.add_argument('--training-data-src-dir', required=True, action=fixPathAction, dest="training_data_src_dir", help="Dir of extracted SRC faceset.") - p.add_argument('--training-data-dst-dir', required=True, action=fixPathAction, dest="training_data_dst_dir", help="Dir of extracted DST faceset.") - p.add_argument('--pretraining-data-dir', action=fixPathAction, dest="pretraining_data_dir", default=None, help="Optional dir of extracted faceset that will be used in pretraining mode.") - p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") - p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") - p.add_argument('--no-preview', action="store_true", dest="no_preview", default=False, help="Disable preview window.") - p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") - p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") - p.add_argument('--execute-program', dest="execute_program", default=[], action='append', nargs='+') - p.set_defaults (func=process_train) - - def process_convert(arguments): - os_utils.set_process_lowest_prio() - args = {'input_dir' : arguments.input_dir, - 'output_dir' : arguments.output_dir, - 'aligned_dir' : arguments.aligned_dir, - 'avaperator_aligned_dir' : arguments.avaperator_aligned_dir, - 'model_dir' : arguments.model_dir, - 'model_name' : arguments.model_name, - 'debug' : arguments.debug, - } - device_args = {'cpu_only' : arguments.cpu_only, - 'force_gpu_idx' : arguments.force_gpu_idx, - } - from mainscripts import Converter - Converter.main (args, device_args) - - p = subparsers.add_parser( "convert", help="Converter") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the converted files will be stored.") - p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", help="Aligned directory. This is where the extracted of dst faces stored.") - p.add_argument('--avaperator-aligned-dir', action=fixPathAction, dest="avaperator_aligned_dir", help="Only for AVATAR model. Directory of aligned avatar operator faces.") - p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") - p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") - p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.") - p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.") - p.set_defaults(func=process_convert) - - videoed_parser = subparsers.add_parser( "videoed", help="Video processing.").add_subparsers() - - def process_videoed_extract_video(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import VideoEd - VideoEd.extract_video (arguments.input_file, arguments.output_dir, arguments.output_ext, arguments.fps) - p = videoed_parser.add_parser( "extract-video", help="Extract images from video file.") - p.add_argument('--input-file', required=True, action=fixPathAction, dest="input_file", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted images will be stored.") - p.add_argument('--output-ext', dest="output_ext", default=None, help="Image format (extension) of output files.") - p.add_argument('--fps', type=int, dest="fps", default=None, help="How many frames of every second of the video will be extracted. 0 - full fps.") - p.set_defaults(func=process_videoed_extract_video) - - def process_videoed_cut_video(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import VideoEd - VideoEd.cut_video (arguments.input_file, - arguments.from_time, - arguments.to_time, - arguments.audio_track_id, - arguments.bitrate) - p = videoed_parser.add_parser( "cut-video", help="Cut video file.") - p.add_argument('--input-file', required=True, action=fixPathAction, dest="input_file", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--from-time', dest="from_time", default=None, help="From time, for example 00:00:00.000") - p.add_argument('--to-time', dest="to_time", default=None, help="To time, for example 00:00:00.000") - p.add_argument('--audio-track-id', type=int, dest="audio_track_id", default=None, help="Specify audio track id.") - p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.") - p.set_defaults(func=process_videoed_cut_video) - - def process_videoed_denoise_image_sequence(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import VideoEd - VideoEd.denoise_image_sequence (arguments.input_dir, arguments.ext, arguments.factor) - p = videoed_parser.add_parser( "denoise-image-sequence", help="Denoise sequence of images, keeping sharp edges. This allows you to make the final fake more believable, since the neural network is not able to make a detailed skin texture, but it makes the edges quite clear. Therefore, if the whole frame is more `blurred`, then a fake will seem more believable. Especially true for scenes of the film, which are usually very clear.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.") - p.add_argument('--factor', type=int, dest="factor", default=None, help="Denoise factor (1-20).") - p.set_defaults(func=process_videoed_denoise_image_sequence) - - def process_videoed_video_from_sequence(arguments): - os_utils.set_process_lowest_prio() - from mainscripts import VideoEd - VideoEd.video_from_sequence (arguments.input_dir, - arguments.output_file, - arguments.reference_file, - arguments.ext, - arguments.fps, - arguments.bitrate, - arguments.lossless) - - p = videoed_parser.add_parser( "video-from-sequence", help="Make video from image sequence.") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--output-file', required=True, action=fixPathAction, dest="output_file", help="Input file to be processed. Specify .*-extension to find first file.") - p.add_argument('--reference-file', action=fixPathAction, dest="reference_file", help="Reference file used to determine proper FPS and transfer audio from it. Specify .*-extension to find first file.") - p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.") - p.add_argument('--fps', type=int, dest="fps", default=None, help="FPS of output file. Overwritten by reference-file.") - p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.") - p.add_argument('--lossless', action="store_true", dest="lossless", default=False, help="PNG codec.") - p.set_defaults(func=process_videoed_video_from_sequence) - - def process_labelingtool_edit_mask(arguments): - from mainscripts import MaskEditorTool - MaskEditorTool.mask_editor_main (arguments.input_dir, arguments.confirmed_dir, arguments.skipped_dir) - - labeling_parser = subparsers.add_parser( "labelingtool", help="Labeling tool.").add_subparsers() - p = labeling_parser.add_parser ( "edit_mask", help="") - p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory of aligned faces.") - p.add_argument('--confirmed-dir', required=True, action=fixPathAction, dest="confirmed_dir", help="This is where the labeled faces will be stored.") - p.add_argument('--skipped-dir', required=True, action=fixPathAction, dest="skipped_dir", help="This is where the labeled faces will be stored.") - p.set_defaults(func=process_labelingtool_edit_mask) - - def bad_args(arguments): - parser.print_help() - exit(0) - parser.set_defaults(func=bad_args) - - arguments = parser.parse_args() - arguments.func(arguments) - - print ("Done.") - - """ - Suppressing error with keras 2.2.4+ on python exit: - - Exception ignored in: > - Traceback (most recent call last): - File "D:\DeepFaceLab\_internal\bin\lib\site-packages\tensorflow\python\client\session.py", line 1413, in __del__ - AttributeError: 'NoneType' object has no attribute 'raise_exception_on_not_ok_status' - - reproduce: https://github.com/keras-team/keras/issues/11751 ( still no solution ) - """ - outnull_file = open(os.devnull, 'w') - os.dup2 ( outnull_file.fileno(), sys.stderr.fileno() ) - sys.stderr = outnull_file - - -''' -import code -code.interact(local=dict(globals(), **locals())) -''' +import os +import sys +import time +import argparse +import multiprocessing +from utils import Path_utils +from utils import os_utils +from pathlib import Path + +train_args = r'python3 main.py train --training-data-src-dir /media/user/5246EBF746EBD9AD/dfl/DFL/workspace/data_src/aligned/ --training-data-dst-dir /media/user/5246EBF746EBD9AD/dfl/DFL/workspace/data_dst/aligned/ --model-dir /media/user/5246EBF746EBD9AD/generic-fs/128h-sae-liaf/ --model SAE' + +if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 6): + raise Exception("This program requires at least Python 3.6") + +class fixPathAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, os.path.abspath(os.path.expanduser(values))) + +if __name__ == "__main__": + multiprocessing.set_start_method("spawn") + + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers() + + def process_extract(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Extractor + Extractor.main( arguments.input_dir, + arguments.output_dir, + arguments.debug_dir, + arguments.detector, + arguments.manual_fix, + arguments.manual_output_debug_fix, + arguments.manual_window_size, + face_type=arguments.face_type, + device_args={'cpu_only' : arguments.cpu_only, + 'multi_gpu' : arguments.multi_gpu, + } + ) + + p = subparsers.add_parser( "extract", help="Extract the faces from a pictures.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.") + p.add_argument('--debug-dir', action=fixPathAction, dest="debug_dir", help="Writes debug images to this directory.") + p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'") + p.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.") + p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") + p.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") + p.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.") + p.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=1368, help="Manual fix window size. Default: 1368.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU. Forces to use MT extractor.") + p.set_defaults (func=process_extract) + + + def process_dev_extract_umd_csv(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Extractor + Extractor.extract_umd_csv( arguments.input_csv_file, + device_args={'cpu_only' : arguments.cpu_only, + 'multi_gpu' : arguments.multi_gpu, + } + ) + + p = subparsers.add_parser( "dev_extract_umd_csv", help="") + p.add_argument('--input-csv-file', required=True, action=fixPathAction, dest="input_csv_file", help="input_csv_file") + p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU.") + p.set_defaults (func=process_dev_extract_umd_csv) + """ + def process_extract_fanseg(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Extractor + Extractor.extract_fanseg( arguments.input_dir, + device_args={'cpu_only' : arguments.cpu_only, + 'multi_gpu' : arguments.multi_gpu, + } + ) + + p = subparsers.add_parser( "extract_fanseg", help="Extract fanseg mask from faces.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU.") + p.set_defaults (func=process_extract_fanseg) + """ + + def process_sort(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Sorter + Sorter.main (input_path=arguments.input_dir, sort_by_method=arguments.sort_by_method) + + p = subparsers.add_parser( "sort", help="Sort faces in a directory.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "face-pitch", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "oneface", "final", "final-no-blur", "test"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) + p.set_defaults (func=process_sort) + + def process_util(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import Util + + if arguments.convert_png_to_jpg: + Util.convert_png_to_jpg_folder (input_path=arguments.input_dir) + + if arguments.add_landmarks_debug_images: + Util.add_landmarks_debug_images (input_path=arguments.input_dir) + + if arguments.recover_original_aligned_filename: + Util.recover_original_aligned_filename (input_path=arguments.input_dir) + + #if arguments.remove_fanseg: + # Util.remove_fanseg_folder (input_path=arguments.input_dir) + + p = subparsers.add_parser( "util", help="Utilities.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--convert-png-to-jpg', action="store_true", dest="convert_png_to_jpg", default=False, help="Convert DeepFaceLAB PNG files to JPEG.") + p.add_argument('--add-landmarks-debug-images', action="store_true", dest="add_landmarks_debug_images", default=False, help="Add landmarks debug image for aligned faces.") + p.add_argument('--recover-original-aligned-filename', action="store_true", dest="recover_original_aligned_filename", default=False, help="Recover original aligned filename.") + #p.add_argument('--remove-fanseg', action="store_true", dest="remove_fanseg", default=False, help="Remove fanseg mask from aligned faces.") + + p.set_defaults (func=process_util) + + def process_train(arguments): + os_utils.set_process_lowest_prio() + args = {'training_data_src_dir' : arguments.training_data_src_dir, + 'training_data_dst_dir' : arguments.training_data_dst_dir, + 'pretraining_data_dir' : arguments.pretraining_data_dir, + 'model_path' : arguments.model_dir, + 'model_name' : arguments.model_name, + 'no_preview' : arguments.no_preview, + 'debug' : arguments.debug, + 'execute_programs' : [ [int(x[0]), x[1] ] for x in arguments.execute_program ] + } + device_args = {'cpu_only' : arguments.cpu_only, + 'force_gpu_idx' : arguments.force_gpu_idx, + } + from mainscripts import Trainer + Trainer.main(args, device_args) + + p = subparsers.add_parser( "train", help="Trainer") + p.add_argument('--training-data-src-dir', required=True, action=fixPathAction, dest="training_data_src_dir", help="Dir of extracted SRC faceset.") + p.add_argument('--training-data-dst-dir', required=True, action=fixPathAction, dest="training_data_dst_dir", help="Dir of extracted DST faceset.") + p.add_argument('--pretraining-data-dir', action=fixPathAction, dest="pretraining_data_dir", default=None, help="Optional dir of extracted faceset that will be used in pretraining mode.") + p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") + p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") + p.add_argument('--no-preview', action="store_true", dest="no_preview", default=False, help="Disable preview window.") + p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") + p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") + p.add_argument('--execute-program', dest="execute_program", default=[], action='append', nargs='+') + p.set_defaults (func=process_train) + + def process_convert(arguments): + os_utils.set_process_lowest_prio() + args = {'input_dir' : arguments.input_dir, + 'output_dir' : arguments.output_dir, + 'aligned_dir' : arguments.aligned_dir, + 'avaperator_aligned_dir' : arguments.avaperator_aligned_dir, + 'model_dir' : arguments.model_dir, + 'model_name' : arguments.model_name, + 'debug' : arguments.debug, + } + device_args = {'cpu_only' : arguments.cpu_only, + 'force_gpu_idx' : arguments.force_gpu_idx, + } + from mainscripts import Converter + Converter.main (args, device_args) + + p = subparsers.add_parser( "convert", help="Converter") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the converted files will be stored.") + p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", help="Aligned directory. This is where the extracted of dst faces stored.") + p.add_argument('--avaperator-aligned-dir', action=fixPathAction, dest="avaperator_aligned_dir", help="Only for AVATAR model. Directory of aligned avatar operator faces.") + p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") + p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") + p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.") + p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.") + p.set_defaults(func=process_convert) + + videoed_parser = subparsers.add_parser( "videoed", help="Video processing.").add_subparsers() + + def process_videoed_extract_video(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import VideoEd + VideoEd.extract_video (arguments.input_file, arguments.output_dir, arguments.output_ext, arguments.fps) + p = videoed_parser.add_parser( "extract-video", help="Extract images from video file.") + p.add_argument('--input-file', required=True, action=fixPathAction, dest="input_file", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted images will be stored.") + p.add_argument('--output-ext', dest="output_ext", default=None, help="Image format (extension) of output files.") + p.add_argument('--fps', type=int, dest="fps", default=None, help="How many frames of every second of the video will be extracted. 0 - full fps.") + p.set_defaults(func=process_videoed_extract_video) + + def process_videoed_cut_video(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import VideoEd + VideoEd.cut_video (arguments.input_file, + arguments.from_time, + arguments.to_time, + arguments.audio_track_id, + arguments.bitrate) + p = videoed_parser.add_parser( "cut-video", help="Cut video file.") + p.add_argument('--input-file', required=True, action=fixPathAction, dest="input_file", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--from-time', dest="from_time", default=None, help="From time, for example 00:00:00.000") + p.add_argument('--to-time', dest="to_time", default=None, help="To time, for example 00:00:00.000") + p.add_argument('--audio-track-id', type=int, dest="audio_track_id", default=None, help="Specify audio track id.") + p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.") + p.set_defaults(func=process_videoed_cut_video) + + def process_videoed_denoise_image_sequence(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import VideoEd + VideoEd.denoise_image_sequence (arguments.input_dir, arguments.ext, arguments.factor) + p = videoed_parser.add_parser( "denoise-image-sequence", help="Denoise sequence of images, keeping sharp edges. This allows you to make the final fake more believable, since the neural network is not able to make a detailed skin texture, but it makes the edges quite clear. Therefore, if the whole frame is more `blurred`, then a fake will seem more believable. Especially true for scenes of the film, which are usually very clear.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.") + p.add_argument('--factor', type=int, dest="factor", default=None, help="Denoise factor (1-20).") + p.set_defaults(func=process_videoed_denoise_image_sequence) + + def process_videoed_video_from_sequence(arguments): + os_utils.set_process_lowest_prio() + from mainscripts import VideoEd + VideoEd.video_from_sequence (arguments.input_dir, + arguments.output_file, + arguments.reference_file, + arguments.ext, + arguments.fps, + arguments.bitrate, + arguments.lossless) + + p = videoed_parser.add_parser( "video-from-sequence", help="Make video from image sequence.") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--output-file', required=True, action=fixPathAction, dest="output_file", help="Input file to be processed. Specify .*-extension to find first file.") + p.add_argument('--reference-file', action=fixPathAction, dest="reference_file", help="Reference file used to determine proper FPS and transfer audio from it. Specify .*-extension to find first file.") + p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.") + p.add_argument('--fps', type=int, dest="fps", default=None, help="FPS of output file. Overwritten by reference-file.") + p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.") + p.add_argument('--lossless', action="store_true", dest="lossless", default=False, help="PNG codec.") + p.set_defaults(func=process_videoed_video_from_sequence) + + def process_labelingtool_edit_mask(arguments): + from mainscripts import MaskEditorTool + MaskEditorTool.mask_editor_main (arguments.input_dir, arguments.confirmed_dir, arguments.skipped_dir) + + labeling_parser = subparsers.add_parser( "labelingtool", help="Labeling tool.").add_subparsers() + p = labeling_parser.add_parser ( "edit_mask", help="") + p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory of aligned faces.") + p.add_argument('--confirmed-dir', required=True, action=fixPathAction, dest="confirmed_dir", help="This is where the labeled faces will be stored.") + p.add_argument('--skipped-dir', required=True, action=fixPathAction, dest="skipped_dir", help="This is where the labeled faces will be stored.") + p.set_defaults(func=process_labelingtool_edit_mask) + + def bad_args(arguments): + parser.print_help() + exit(0) + parser.set_defaults(func=bad_args) + + arguments = parser.parse_args() + arguments.func(arguments) + + print ("Done.") + + """ + Suppressing error with keras 2.2.4+ on python exit: + + Exception ignored in: > + Traceback (most recent call last): + File "D:\DeepFaceLab\_internal\bin\lib\site-packages\tensorflow\python\client\session.py", line 1413, in __del__ + AttributeError: 'NoneType' object has no attribute 'raise_exception_on_not_ok_status' + + reproduce: https://github.com/keras-team/keras/issues/11751 ( still no solution ) + """ + outnull_file = open(os.devnull, 'w') + os.dup2 ( outnull_file.fileno(), sys.stderr.fileno() ) + sys.stderr = outnull_file + + +''' +import code +code.interact(local=dict(globals(), **locals())) +''' diff --git a/mainscripts/Converter.py b/mainscripts/Converter.py index 7d21748..0439914 100644 --- a/mainscripts/Converter.py +++ b/mainscripts/Converter.py @@ -1,391 +1,391 @@ -import sys -import multiprocessing -import operator -import os -import shutil -import time -import traceback -from pathlib import Path - -import cv2 -import numpy as np - -from converters import Converter -from interact import interact as io -from joblib import SubprocessFunctionCaller, Subprocessor -from utils import Path_utils -from utils.cv2_utils import * -from utils.DFLJPG import DFLJPG -from utils.DFLPNG import DFLPNG -from imagelib import normalize_channels - -class ConvertSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - io.log_info ('Running on %s.' % (client_dict['device_name']) ) - self.device_idx = client_dict['device_idx'] - self.device_name = client_dict['device_name'] - self.converter = client_dict['converter'] - self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None - self.alignments = client_dict['alignments'] - self.avatar_image_paths = client_dict['avatar_image_paths'] - self.debug = client_dict['debug'] - - #transfer and set stdin in order to work code.interact in debug subprocess - stdin_fd = client_dict['stdin_fd'] - if stdin_fd is not None: - sys.stdin = os.fdopen(stdin_fd) - - from nnlib import nnlib - #model process ate all GPU mem, - #so we cannot use GPU for any TF operations in converter processes - #therefore forcing active_DeviceConfig to CPU only - nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True) - - self.converter.on_cli_initialize() - - return None - - #override - def process_data(self, data): - idx, filename = data - filename_path = Path(filename) - files_processed = 1 - faces_processed = 0 - - output_filename_path = self.output_path / (filename_path.stem + '.png') - - if (self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR ) \ - and filename_path.stem not in self.alignments.keys(): - if not self.debug: - self.log_info ( 'no faces found for %s, copying without faces' % (filename_path.name) ) - - if filename_path.suffix == '.png': - shutil.copy ( str(filename_path), str(output_filename_path) ) - else: - image = cv2_imread(str(filename_path)) - cv2_imwrite ( str(output_filename_path), image ) - else: - image = (cv2_imread(str(filename_path)) / 255.0).astype(np.float32) - image = normalize_channels (image, 3) - - if self.converter.type == Converter.TYPE_IMAGE: - image = self.converter.cli_convert_image(image, None, self.debug) - - if self.debug: - return (1, image) - - faces_processed = 1 - - elif self.converter.type == Converter.TYPE_IMAGE_WITH_LANDMARKS: - #currently unused - if filename_path.suffix == '.png': - dflimg = DFLPNG.load( str(filename_path) ) - elif filename_path.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filename_path) ) - else: - dflimg = None - - if dflimg is not None: - image_landmarks = dflimg.get_landmarks() - - image = self.converter.convert_image(image, image_landmarks, self.debug) - - if self.debug: - raise NotImplementedError - #for img in image: - # io.show_image ('Debug convert', img ) - # cv2.waitKey(0) - faces_processed = 1 - else: - self.log_err ("%s is not a dfl image file" % (filename_path.name) ) - - elif self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR: - - ava_face = None - if self.converter.type == Converter.TYPE_FACE_AVATAR: - ava_filename_path = self.avatar_image_paths[idx] - ava_face = (cv2_imread(str(ava_filename_path)) / 255.0).astype(np.float32) - ava_face = normalize_channels (ava_face, 3) - faces = self.alignments[filename_path.stem] - - if self.debug: - debug_images = [] - - for face_num, image_landmarks in enumerate(faces): - try: - if self.debug: - self.log_info ( '\nConverting face_num [%d] in file [%s]' % (face_num, filename_path) ) - - if self.debug: - debug_images += self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) - else: - image = self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) - - except Exception as e: - e_str = traceback.format_exc() - if 'MemoryError' in e_str: - raise Subprocessor.SilenceException - else: - raise Exception( 'Error while converting face_num [%d] in file [%s]: %s' % (face_num, filename_path, e_str) ) - - if self.debug: - return (1, debug_images) - - faces_processed = len(faces) - - if not self.debug: - cv2_imwrite (str(output_filename_path), (image*255).astype(np.uint8) ) - - - return (0, files_processed, faces_processed) - - #overridable - def get_data_name (self, data): - #return string identificator of your data - idx, filename = data - return filename - - #override - def __init__(self, converter, input_path_image_paths, output_path, alignments, avatar_image_paths=None, debug = False): - super().__init__('Converter', ConvertSubprocessor.Cli, 86400 if debug == True else 60) - - self.converter = converter - self.input_data = self.input_path_image_paths = input_path_image_paths - self.input_data_idxs = [ *range(len(self.input_data)) ] - self.output_path = output_path - self.alignments = alignments - self.avatar_image_paths = avatar_image_paths - self.debug = debug - - self.files_processed = 0 - self.faces_processed = 0 - - #override - def process_info_generator(self): - r = [0] if self.debug else range( min(6,multiprocessing.cpu_count()) ) - - for i in r: - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'converter' : self.converter, - 'output_dir' : str(self.output_path), - 'alignments' : self.alignments, - 'avatar_image_paths' : self.avatar_image_paths, - 'debug': self.debug, - 'stdin_fd': sys.stdin.fileno() if self.debug else None - } - - #overridable optional - def on_clients_initialized(self): - if self.debug: - io.named_window ("Debug convert") - - io.progress_bar ("Converting", len (self.input_data_idxs) ) - - #overridable optional - def on_clients_finalized(self): - io.progress_bar_close() - - if self.debug: - io.destroy_all_windows() - - #override - def get_data(self, host_dict): - if len (self.input_data_idxs) > 0: - idx = self.input_data_idxs.pop(0) - return (idx, self.input_data[idx]) - return None - - #override - def on_data_return (self, host_dict, data): - idx, filename = data - self.input_data_idxs.insert(0, idx) - - #override - def on_result (self, host_dict, data, result): - if result[0] == 0: - self.files_processed += result[0] - self.faces_processed += result[1] - elif result[0] == 1: - for img in result[1]: - io.show_image ('Debug convert', (img*255).astype(np.uint8) ) - io.wait_any_key() - io.progress_bar_inc(1) - - #override - def on_tick(self): - self.converter.on_host_tick() - - #override - def get_result(self): - return self.files_processed, self.faces_processed - -def main (args, device_args): - io.log_info ("Running converter.\r\n") - - aligned_dir = args.get('aligned_dir', None) - avaperator_aligned_dir = args.get('avaperator_aligned_dir', None) - - try: - input_path = Path(args['input_dir']) - output_path = Path(args['output_dir']) - model_path = Path(args['model_dir']) - - if not input_path.exists(): - io.log_err('Input directory not found. Please ensure it exists.') - return - - if output_path.exists(): - for filename in Path_utils.get_image_paths(output_path): - Path(filename).unlink() - else: - output_path.mkdir(parents=True, exist_ok=True) - - if not model_path.exists(): - io.log_err('Model directory not found. Please ensure it exists.') - return - - import models - model = models.import_model( args['model_name'] )(model_path, device_args=device_args) - converter = model.get_converter() - - input_path_image_paths = Path_utils.get_image_paths(input_path) - alignments = None - avatar_image_paths = None - if converter.type == Converter.TYPE_FACE or converter.type == Converter.TYPE_FACE_AVATAR: - if aligned_dir is None: - io.log_err('Aligned directory not found. Please ensure it exists.') - return - - aligned_path = Path(aligned_dir) - if not aligned_path.exists(): - io.log_err('Aligned directory not found. Please ensure it exists.') - return - - alignments = {} - - aligned_path_image_paths = Path_utils.get_image_paths(aligned_path) - for filepath in io.progress_bar_generator(aligned_path_image_paths, "Collecting alignments"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - - source_filename_stem = Path( dflimg.get_source_filename() ).stem - if source_filename_stem not in alignments.keys(): - alignments[ source_filename_stem ] = [] - - alignments[ source_filename_stem ].append (dflimg.get_source_landmarks()) - - - if converter.type == Converter.TYPE_FACE_AVATAR: - if avaperator_aligned_dir is None: - io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') - return - - avaperator_aligned_path = Path(avaperator_aligned_dir) - if not avaperator_aligned_path.exists(): - io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') - return - - avatar_image_paths = [] - for filename in io.progress_bar_generator( Path_utils.get_image_paths(avaperator_aligned_path) , "Sorting avaperator faces"): - filepath = Path(filename) - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("Fatal error: %s is not a dfl image file" % (filepath.name) ) - return - - avatar_image_paths += [ (filename, dflimg.get_source_filename() ) ] - avatar_image_paths = [ p[0] for p in sorted(avatar_image_paths, key=operator.itemgetter(1)) ] - - if len(input_path_image_paths) < len(avatar_image_paths): - io.log_err("Input faces count must be >= avatar operator faces count.") - return - - files_processed, faces_processed = ConvertSubprocessor ( - converter = converter, - input_path_image_paths = input_path_image_paths, - output_path = output_path, - alignments = alignments, - avatar_image_paths = avatar_image_paths, - debug = args.get('debug',False) - ).run() - - model.finalize() - - except Exception as e: - print ( 'Error: %s' % (str(e))) - traceback.print_exc() - -''' -if model_name == 'AVATAR': - output_path_image_paths = Path_utils.get_image_paths(output_path) - - last_ok_frame = -1 - for filename in output_path_image_paths: - filename_path = Path(filename) - stem = Path(filename).stem - try: - frame = int(stem) - except: - raise Exception ('Aligned avatars must be created from indexed sequence files.') - - if frame-last_ok_frame > 1: - start = last_ok_frame + 1 - end = frame - 1 - - print ("Filling gaps: [%d...%d]" % (start, end) ) - for i in range (start, end+1): - shutil.copy ( str(filename), str( output_path / ('%.5d%s' % (i, filename_path.suffix )) ) ) - - last_ok_frame = frame -''' -#interpolate landmarks -#from facelib import LandmarksProcessor -#from facelib import FaceType -#a = sorted(alignments.keys()) -#a_len = len(a) -# -#box_pts = 3 -#box = np.ones(box_pts)/box_pts -#for i in range( a_len ): -# if i >= box_pts and i <= a_len-box_pts-1: -# af0 = alignments[ a[i] ][0] ##first face -# m0 = LandmarksProcessor.get_transform_mat (af0, 256, face_type=FaceType.FULL) -# -# points = [] -# -# for j in range(-box_pts, box_pts+1): -# af = alignments[ a[i+j] ][0] ##first face -# m = LandmarksProcessor.get_transform_mat (af, 256, face_type=FaceType.FULL) -# p = LandmarksProcessor.transform_points (af, m) -# points.append (p) -# -# points = np.array(points) -# points_len = len(points) -# t_points = np.transpose(points, [1,0,2]) -# -# p1 = np.array ( [ int(np.convolve(x[:,0], box, mode='same')[points_len//2]) for x in t_points ] ) -# p2 = np.array ( [ int(np.convolve(x[:,1], box, mode='same')[points_len//2]) for x in t_points ] ) -# -# new_points = np.concatenate( [np.expand_dims(p1,-1),np.expand_dims(p2,-1)], -1 ) -# -# alignments[ a[i] ][0] = LandmarksProcessor.transform_points (new_points, m0, True).astype(np.int32) +import sys +import multiprocessing +import operator +import os +import shutil +import time +import traceback +from pathlib import Path + +import cv2 +import numpy as np + +from converters import Converter +from interact import interact as io +from joblib import SubprocessFunctionCaller, Subprocessor +from utils import Path_utils +from utils.cv2_utils import * +from utils.DFLJPG import DFLJPG +from utils.DFLPNG import DFLPNG +from imagelib import normalize_channels + +class ConvertSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + + #override + def on_initialize(self, client_dict): + io.log_info ('Running on %s.' % (client_dict['device_name']) ) + self.device_idx = client_dict['device_idx'] + self.device_name = client_dict['device_name'] + self.converter = client_dict['converter'] + self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None + self.alignments = client_dict['alignments'] + self.avatar_image_paths = client_dict['avatar_image_paths'] + self.debug = client_dict['debug'] + + #transfer and set stdin in order to work code.interact in debug subprocess + stdin_fd = client_dict['stdin_fd'] + if stdin_fd is not None: + sys.stdin = os.fdopen(stdin_fd) + + from nnlib import nnlib + #model process ate all GPU mem, + #so we cannot use GPU for any TF operations in converter processes + #therefore forcing active_DeviceConfig to CPU only + nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True) + + self.converter.on_cli_initialize() + + return None + + #override + def process_data(self, data): + idx, filename = data + filename_path = Path(filename) + files_processed = 1 + faces_processed = 0 + + output_filename_path = self.output_path / (filename_path.stem + '.png') + + if (self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR ) \ + and filename_path.stem not in self.alignments.keys(): + if not self.debug: + self.log_info ( 'no faces found for %s, copying without faces' % (filename_path.name) ) + + if filename_path.suffix == '.png': + shutil.copy ( str(filename_path), str(output_filename_path) ) + else: + image = cv2_imread(str(filename_path)) + cv2_imwrite ( str(output_filename_path), image ) + else: + image = (cv2_imread(str(filename_path)) / 255.0).astype(np.float32) + image = normalize_channels (image, 3) + + if self.converter.type == Converter.TYPE_IMAGE: + image = self.converter.cli_convert_image(image, None, self.debug) + + if self.debug: + return (1, image) + + faces_processed = 1 + + elif self.converter.type == Converter.TYPE_IMAGE_WITH_LANDMARKS: + #currently unused + if filename_path.suffix == '.png': + dflimg = DFLPNG.load( str(filename_path) ) + elif filename_path.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filename_path) ) + else: + dflimg = None + + if dflimg is not None: + image_landmarks = dflimg.get_landmarks() + + image = self.converter.convert_image(image, image_landmarks, self.debug) + + if self.debug: + raise NotImplementedError + #for img in image: + # io.show_image ('Debug convert', img ) + # cv2.waitKey(0) + faces_processed = 1 + else: + self.log_err ("%s is not a dfl image file" % (filename_path.name) ) + + elif self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR: + + ava_face = None + if self.converter.type == Converter.TYPE_FACE_AVATAR: + ava_filename_path = self.avatar_image_paths[idx] + ava_face = (cv2_imread(str(ava_filename_path)) / 255.0).astype(np.float32) + ava_face = normalize_channels (ava_face, 3) + faces = self.alignments[filename_path.stem] + + if self.debug: + debug_images = [] + + for face_num, image_landmarks in enumerate(faces): + try: + if self.debug: + self.log_info ( '\nConverting face_num [%d] in file [%s]' % (face_num, filename_path) ) + + if self.debug: + debug_images += self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) + else: + image = self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) + + except Exception as e: + e_str = traceback.format_exc() + if 'MemoryError' in e_str: + raise Subprocessor.SilenceException + else: + raise Exception( 'Error while converting face_num [%d] in file [%s]: %s' % (face_num, filename_path, e_str) ) + + if self.debug: + return (1, debug_images) + + faces_processed = len(faces) + + if not self.debug: + cv2_imwrite (str(output_filename_path), (image*255).astype(np.uint8) ) + + + return (0, files_processed, faces_processed) + + #overridable + def get_data_name (self, data): + #return string identificator of your data + idx, filename = data + return filename + + #override + def __init__(self, converter, input_path_image_paths, output_path, alignments, avatar_image_paths=None, debug = False): + super().__init__('Converter', ConvertSubprocessor.Cli, 86400 if debug == True else 60) + + self.converter = converter + self.input_data = self.input_path_image_paths = input_path_image_paths + self.input_data_idxs = [ *range(len(self.input_data)) ] + self.output_path = output_path + self.alignments = alignments + self.avatar_image_paths = avatar_image_paths + self.debug = debug + + self.files_processed = 0 + self.faces_processed = 0 + + #override + def process_info_generator(self): + r = [0] if self.debug else range( min(6,multiprocessing.cpu_count()) ) + + for i in r: + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + 'converter' : self.converter, + 'output_dir' : str(self.output_path), + 'alignments' : self.alignments, + 'avatar_image_paths' : self.avatar_image_paths, + 'debug': self.debug, + 'stdin_fd': sys.stdin.fileno() if self.debug else None + } + + #overridable optional + def on_clients_initialized(self): + if self.debug: + io.named_window ("Debug convert") + + io.progress_bar ("Converting", len (self.input_data_idxs) ) + + #overridable optional + def on_clients_finalized(self): + io.progress_bar_close() + + if self.debug: + io.destroy_all_windows() + + #override + def get_data(self, host_dict): + if len (self.input_data_idxs) > 0: + idx = self.input_data_idxs.pop(0) + return (idx, self.input_data[idx]) + return None + + #override + def on_data_return (self, host_dict, data): + idx, filename = data + self.input_data_idxs.insert(0, idx) + + #override + def on_result (self, host_dict, data, result): + if result[0] == 0: + self.files_processed += result[0] + self.faces_processed += result[1] + elif result[0] == 1: + for img in result[1]: + io.show_image ('Debug convert', (img*255).astype(np.uint8) ) + io.wait_any_key() + io.progress_bar_inc(1) + + #override + def on_tick(self): + self.converter.on_host_tick() + + #override + def get_result(self): + return self.files_processed, self.faces_processed + +def main (args, device_args): + io.log_info ("Running converter.\r\n") + + aligned_dir = args.get('aligned_dir', None) + avaperator_aligned_dir = args.get('avaperator_aligned_dir', None) + + try: + input_path = Path(args['input_dir']) + output_path = Path(args['output_dir']) + model_path = Path(args['model_dir']) + + if not input_path.exists(): + io.log_err('Input directory not found. Please ensure it exists.') + return + + if output_path.exists(): + for filename in Path_utils.get_image_paths(output_path): + Path(filename).unlink() + else: + output_path.mkdir(parents=True, exist_ok=True) + + if not model_path.exists(): + io.log_err('Model directory not found. Please ensure it exists.') + return + + import models + model = models.import_model( args['model_name'] )(model_path, device_args=device_args) + converter = model.get_converter() + + input_path_image_paths = Path_utils.get_image_paths(input_path) + alignments = None + avatar_image_paths = None + if converter.type == Converter.TYPE_FACE or converter.type == Converter.TYPE_FACE_AVATAR: + if aligned_dir is None: + io.log_err('Aligned directory not found. Please ensure it exists.') + return + + aligned_path = Path(aligned_dir) + if not aligned_path.exists(): + io.log_err('Aligned directory not found. Please ensure it exists.') + return + + alignments = {} + + aligned_path_image_paths = Path_utils.get_image_paths(aligned_path) + for filepath in io.progress_bar_generator(aligned_path_image_paths, "Collecting alignments"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + continue + + source_filename_stem = Path( dflimg.get_source_filename() ).stem + if source_filename_stem not in alignments.keys(): + alignments[ source_filename_stem ] = [] + + alignments[ source_filename_stem ].append (dflimg.get_source_landmarks()) + + + if converter.type == Converter.TYPE_FACE_AVATAR: + if avaperator_aligned_dir is None: + io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') + return + + avaperator_aligned_path = Path(avaperator_aligned_dir) + if not avaperator_aligned_path.exists(): + io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') + return + + avatar_image_paths = [] + for filename in io.progress_bar_generator( Path_utils.get_image_paths(avaperator_aligned_path) , "Sorting avaperator faces"): + filepath = Path(filename) + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("Fatal error: %s is not a dfl image file" % (filepath.name) ) + return + + avatar_image_paths += [ (filename, dflimg.get_source_filename() ) ] + avatar_image_paths = [ p[0] for p in sorted(avatar_image_paths, key=operator.itemgetter(1)) ] + + if len(input_path_image_paths) < len(avatar_image_paths): + io.log_err("Input faces count must be >= avatar operator faces count.") + return + + files_processed, faces_processed = ConvertSubprocessor ( + converter = converter, + input_path_image_paths = input_path_image_paths, + output_path = output_path, + alignments = alignments, + avatar_image_paths = avatar_image_paths, + debug = args.get('debug',False) + ).run() + + model.finalize() + + except Exception as e: + print ( 'Error: %s' % (str(e))) + traceback.print_exc() + +''' +if model_name == 'AVATAR': + output_path_image_paths = Path_utils.get_image_paths(output_path) + + last_ok_frame = -1 + for filename in output_path_image_paths: + filename_path = Path(filename) + stem = Path(filename).stem + try: + frame = int(stem) + except: + raise Exception ('Aligned avatars must be created from indexed sequence files.') + + if frame-last_ok_frame > 1: + start = last_ok_frame + 1 + end = frame - 1 + + print ("Filling gaps: [%d...%d]" % (start, end) ) + for i in range (start, end+1): + shutil.copy ( str(filename), str( output_path / ('%.5d%s' % (i, filename_path.suffix )) ) ) + + last_ok_frame = frame +''' +#interpolate landmarks +#from facelib import LandmarksProcessor +#from facelib import FaceType +#a = sorted(alignments.keys()) +#a_len = len(a) +# +#box_pts = 3 +#box = np.ones(box_pts)/box_pts +#for i in range( a_len ): +# if i >= box_pts and i <= a_len-box_pts-1: +# af0 = alignments[ a[i] ][0] ##first face +# m0 = LandmarksProcessor.get_transform_mat (af0, 256, face_type=FaceType.FULL) +# +# points = [] +# +# for j in range(-box_pts, box_pts+1): +# af = alignments[ a[i+j] ][0] ##first face +# m = LandmarksProcessor.get_transform_mat (af, 256, face_type=FaceType.FULL) +# p = LandmarksProcessor.transform_points (af, m) +# points.append (p) +# +# points = np.array(points) +# points_len = len(points) +# t_points = np.transpose(points, [1,0,2]) +# +# p1 = np.array ( [ int(np.convolve(x[:,0], box, mode='same')[points_len//2]) for x in t_points ] ) +# p2 = np.array ( [ int(np.convolve(x[:,1], box, mode='same')[points_len//2]) for x in t_points ] ) +# +# new_points = np.concatenate( [np.expand_dims(p1,-1),np.expand_dims(p2,-1)], -1 ) +# +# alignments[ a[i] ][0] = LandmarksProcessor.transform_points (new_points, m0, True).astype(np.int32) diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py index 56ea0c6..17a243d 100644 --- a/mainscripts/Extractor.py +++ b/mainscripts/Extractor.py @@ -1,873 +1,873 @@ -import traceback -import os -import sys -import time -import multiprocessing -import shutil -from pathlib import Path -import numpy as np -import mathlib -import imagelib -import cv2 -from utils import Path_utils -from utils.DFLPNG import DFLPNG -from utils.DFLJPG import DFLJPG -from utils.cv2_utils import * -import facelib -from facelib import FaceType -from facelib import LandmarksProcessor -from facelib import FANSegmentator -from nnlib import nnlib -from joblib import Subprocessor -from interact import interact as io - -class ExtractSubprocessor(Subprocessor): - class Data(object): - def __init__(self, filename=None, rects=None, landmarks = None, landmarks_accurate=True, pitch_yaw_roll=None, final_output_files = None): - self.filename = filename - self.rects = rects or [] - self.rects_rotation = 0 - self.landmarks_accurate = landmarks_accurate - self.landmarks = landmarks or [] - self.pitch_yaw_roll = pitch_yaw_roll - self.final_output_files = final_output_files or [] - self.faces_detected = 0 - - class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - self.type = client_dict['type'] - self.image_size = client_dict['image_size'] - self.face_type = client_dict['face_type'] - self.device_idx = client_dict['device_idx'] - self.cpu_only = client_dict['device_type'] == 'CPU' - self.final_output_path = Path(client_dict['final_output_dir']) if 'final_output_dir' in client_dict.keys() else None - self.debug_dir = client_dict['debug_dir'] - - self.cached_image = (None, None) - - self.e = None - device_config = nnlib.DeviceConfig ( cpu_only=self.cpu_only, force_gpu_idx=self.device_idx, allow_growth=True) - self.device_vram = device_config.gpu_vram_gb[0] - - intro_str = 'Running on %s.' % (client_dict['device_name']) - if not self.cpu_only and self.device_vram <= 2: - intro_str += " Recommended to close all programs using this device." - - self.log_info (intro_str) - - if 'rects' in self.type: - if self.type == 'rects-mt': - nnlib.import_all (device_config) - self.e = facelib.MTCExtractor() - elif self.type == 'rects-dlib': - nnlib.import_dlib (device_config) - self.e = facelib.DLIBExtractor(nnlib.dlib) - elif self.type == 'rects-s3fd': - nnlib.import_all (device_config) - self.e = facelib.S3FDExtractor() - else: - raise ValueError ("Wrong type.") - - if self.e is not None: - self.e.__enter__() - - elif self.type == 'landmarks': - nnlib.import_all (device_config) - self.e = facelib.LandmarksExtractor(nnlib.keras) - self.e.__enter__() - if self.device_vram >= 2: - self.second_pass_e = facelib.S3FDExtractor() - self.second_pass_e.__enter__() - else: - self.second_pass_e = None - - elif self.type == 'fanseg': - nnlib.import_all (device_config) - self.e = facelib.FANSegmentator(256, FaceType.toString(FaceType.FULL) ) - self.e.__enter__() - - elif self.type == 'final': - pass - - #override - def on_finalize(self): - if self.e is not None: - self.e.__exit__() - - #override - def process_data(self, data): - filename_path = Path( data.filename ) - - filename_path_str = str(filename_path) - if self.cached_image[0] == filename_path_str: - image = self.cached_image[1] #cached image for manual extractor - else: - image = cv2_imread( filename_path_str ) - - if image is None: - self.log_err ( 'Failed to extract %s, reason: cv2_imread() fail.' % ( str(filename_path) ) ) - return data - - image_shape = image.shape - if len(image_shape) == 2: - h, w = image.shape - image = image[:,:,np.newaxis] - ch = 1 - else: - h, w, ch = image.shape - - if ch == 1: - image = np.repeat (image, 3, -1) - elif ch == 4: - image = image[:,:,0:3] - - wm, hm = w % 2, h % 2 - if wm + hm != 0: #fix odd image - image = image[0:h-hm,0:w-wm,:] - self.cached_image = ( filename_path_str, image ) - - src_dflimg = None - h, w, ch = image.shape - if h == w: - #extracting from already extracted jpg image? - if filename_path.suffix == '.png': - src_dflimg = DFLPNG.load ( str(filename_path) ) - if filename_path.suffix == '.jpg': - src_dflimg = DFLJPG.load ( str(filename_path) ) - - if 'rects' in self.type: - if min(w,h) < 128: - self.log_err ( 'Image is too small %s : [%d, %d]' % ( str(filename_path), w, h ) ) - data.rects = [] - else: - for rot in ([0, 90, 270, 180]): - data.rects_rotation = rot - if rot == 0: - rotated_image = image - elif rot == 90: - rotated_image = image.swapaxes( 0,1 )[:,::-1,:] - elif rot == 180: - rotated_image = image[::-1,::-1,:] - elif rot == 270: - rotated_image = image.swapaxes( 0,1 )[::-1,:,:] - - rects = data.rects = self.e.extract (rotated_image, is_bgr=True) - if len(rects) != 0: - break - - return data - - elif self.type == 'landmarks': - - if data.rects_rotation == 0: - rotated_image = image - elif data.rects_rotation == 90: - rotated_image = image.swapaxes( 0,1 )[:,::-1,:] - elif data.rects_rotation == 180: - rotated_image = image[::-1,::-1,:] - elif data.rects_rotation == 270: - rotated_image = image.swapaxes( 0,1 )[::-1,:,:] - - data.landmarks = self.e.extract (rotated_image, data.rects, self.second_pass_e if (src_dflimg is None and data.landmarks_accurate) else None, is_bgr=True) - if data.rects_rotation != 0: - for i, (rect, lmrks) in enumerate(zip(data.rects, data.landmarks)): - new_rect, new_lmrks = rect, lmrks - (l,t,r,b) = rect - if data.rects_rotation == 90: - new_rect = ( t, h-l, b, h-r) - if lmrks is not None: - new_lmrks = lmrks[:,::-1].copy() - new_lmrks[:,1] = h - new_lmrks[:,1] - elif data.rects_rotation == 180: - if lmrks is not None: - new_rect = ( w-l, h-t, w-r, h-b) - new_lmrks = lmrks.copy() - new_lmrks[:,0] = w - new_lmrks[:,0] - new_lmrks[:,1] = h - new_lmrks[:,1] - elif data.rects_rotation == 270: - new_rect = ( w-b, l, w-t, r ) - if lmrks is not None: - new_lmrks = lmrks[:,::-1].copy() - new_lmrks[:,0] = w - new_lmrks[:,0] - data.rects[i], data.landmarks[i] = new_rect, new_lmrks - - return data - - elif self.type == 'final': - data.final_output_files = [] - rects = data.rects - landmarks = data.landmarks - - if self.debug_dir is not None: - debug_output_file = str( Path(self.debug_dir) / (filename_path.stem+'.jpg') ) - debug_image = image.copy() - - if src_dflimg is not None and len(rects) != 1: - #if re-extracting from dflimg and more than 1 or zero faces detected - dont process and just copy it - print("src_dflimg is not None and len(rects) != 1", str(filename_path) ) - output_file = str(self.final_output_path / filename_path.name) - if str(filename_path) != str(output_file): - shutil.copy ( str(filename_path), str(output_file) ) - data.final_output_files.append (output_file) - else: - face_idx = 0 - for rect, image_landmarks in zip( rects, landmarks ): - if src_dflimg is not None and face_idx > 1: - #cannot extract more than 1 face from dflimg - break - - if image_landmarks is None: - continue - - rect = np.array(rect) - - if self.face_type == FaceType.MARK_ONLY: - face_image = image - face_image_landmarks = image_landmarks - else: - image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type) - face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4) - face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) - - landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True) - - rect_area = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]])) - landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] ) - - if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area - continue - - if self.debug_dir is not None: - LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type, transparent_mask=True) - - if src_dflimg is not None and filename_path.suffix == '.jpg': - #if extracting from dflimg and jpg copy it in order not to lose quality - output_file = str(self.final_output_path / filename_path.name) - if str(filename_path) != str(output_file): - shutil.copy ( str(filename_path), str(output_file) ) - else: - output_file = '{}_{}{}'.format(str(self.final_output_path / filename_path.stem), str(face_idx), '.jpg') - cv2_imwrite(output_file, face_image, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) - - DFLJPG.embed_data(output_file, face_type=FaceType.toString(self.face_type), - landmarks=face_image_landmarks.tolist(), - source_filename=filename_path.name, - source_rect=rect, - source_landmarks=image_landmarks.tolist(), - image_to_face_mat=image_to_face_mat, - pitch_yaw_roll=data.pitch_yaw_roll - ) - - data.final_output_files.append (output_file) - face_idx += 1 - data.faces_detected = face_idx - - if self.debug_dir is not None: - cv2_imwrite(debug_output_file, debug_image, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) - - return data - - elif self.type == 'fanseg': - if src_dflimg is not None: - fanseg_mask = self.e.extract( image / 255.0 ) - src_dflimg.embed_and_set( filename_path_str, - fanseg_mask=fanseg_mask, - #fanseg_mask_ver=FANSegmentator.VERSION, - ) - - #overridable - def get_data_name (self, data): - #return string identificator of your data - return data.filename - - #override - def __init__(self, input_data, type, image_size=None, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, final_output_path=None): - self.input_data = input_data - self.type = type - self.image_size = image_size - self.face_type = face_type - self.debug_dir = debug_dir - self.final_output_path = final_output_path - self.manual = manual - self.manual_window_size = manual_window_size - self.result = [] - - self.devices = ExtractSubprocessor.get_devices_for_config(self.manual, self.type, multi_gpu, cpu_only) - - no_response_time_sec = 60 if not self.manual else 999999 - super().__init__('Extractor', ExtractSubprocessor.Cli, no_response_time_sec) - - #override - def on_check_run(self): - if len(self.devices) == 0: - io.log_err("No devices found to start subprocessor.") - return False - return True - - #override - def on_clients_initialized(self): - if self.manual == True: - self.wnd_name = 'Manual pass' - io.named_window(self.wnd_name) - io.capture_mouse(self.wnd_name) - io.capture_keys(self.wnd_name) - - self.cache_original_image = (None, None) - self.cache_image = (None, None) - self.cache_text_lines_img = (None, None) - self.hide_help = False - self.landmarks_accurate = True - - self.landmarks = None - self.x = 0 - self.y = 0 - self.rect_size = 100 - self.rect_locked = False - self.extract_needed = True - - io.progress_bar (None, len (self.input_data)) - - #override - def on_clients_finalized(self): - if self.manual == True: - io.destroy_all_windows() - - io.progress_bar_close() - - #override - def process_info_generator(self): - base_dict = {'type' : self.type, - 'image_size': self.image_size, - 'face_type': self.face_type, - 'debug_dir': self.debug_dir, - 'final_output_dir': str(self.final_output_path)} - - - for (device_idx, device_type, device_name, device_total_vram_gb) in self.devices: - client_dict = base_dict.copy() - client_dict['device_idx'] = device_idx - client_dict['device_name'] = device_name - client_dict['device_type'] = device_type - yield client_dict['device_name'], {}, client_dict - - #override - def get_data(self, host_dict): - if not self.manual: - if len (self.input_data) > 0: - return self.input_data.pop(0) - else: - need_remark_face = False - redraw_needed = False - while len (self.input_data) > 0: - data = self.input_data[0] - filename, data_rects, data_landmarks = data.filename, data.rects, data.landmarks - is_frame_done = False - - if need_remark_face: # need remark image from input data that already has a marked face? - need_remark_face = False - if len(data_rects) != 0: # If there was already a face then lock the rectangle to it until the mouse is clicked - self.rect = data_rects.pop() - self.landmarks = data_landmarks.pop() - data_rects.clear() - data_landmarks.clear() - redraw_needed = True - self.rect_locked = True - self.rect_size = ( self.rect[2] - self.rect[0] ) / 2 - self.x = ( self.rect[0] + self.rect[2] ) / 2 - self.y = ( self.rect[1] + self.rect[3] ) / 2 - - if len(data_rects) == 0: - if self.cache_original_image[0] == filename: - self.original_image = self.cache_original_image[1] - else: - self.original_image = cv2_imread( filename ) - self.cache_original_image = (filename, self.original_image ) - - (h,w,c) = self.original_image.shape - self.view_scale = 1.0 if self.manual_window_size == 0 else self.manual_window_size / ( h * (16.0/9.0) ) - - if self.cache_image[0] == (h,w,c) + (self.view_scale,filename): - self.image = self.cache_image[1] - else: - self.image = cv2.resize (self.original_image, ( int(w*self.view_scale), int(h*self.view_scale) ), interpolation=cv2.INTER_LINEAR) - self.cache_image = ( (h,w,c) + (self.view_scale,filename), self.image ) - - (h,w,c) = self.image.shape - - sh = (0,0, w, min(100, h) ) - if self.cache_text_lines_img[0] == sh: - self.text_lines_img = self.cache_text_lines_img[1] - else: - self.text_lines_img = (imagelib.get_draw_text_lines ( self.image, sh, - [ '[Mouse click] - lock/unlock selection', - '[Mouse wheel] - change rect', - '[Enter] / [Space] - confirm / skip frame', - '[,] [.]- prev frame, next frame. [Q] - skip remaining frames', - '[a] - accuracy on/off (more fps)', - '[h] - hide this help' - ], (1, 1, 1) )*255).astype(np.uint8) - - self.cache_text_lines_img = (sh, self.text_lines_img) - - while True: - io.process_messages(0.0001) - - new_x = self.x - new_y = self.y - new_rect_size = self.rect_size - - mouse_events = io.get_mouse_events(self.wnd_name) - for ev in mouse_events: - (x, y, ev, flags) = ev - if ev == io.EVENT_MOUSEWHEEL and not self.rect_locked: - mod = 1 if flags > 0 else -1 - diff = 1 if new_rect_size <= 40 else np.clip(new_rect_size / 10, 1, 10) - new_rect_size = max (5, new_rect_size + diff*mod) - elif ev == io.EVENT_LBUTTONDOWN: - self.rect_locked = not self.rect_locked - self.extract_needed = True - elif not self.rect_locked: - new_x = np.clip (x, 0, w-1) / self.view_scale - new_y = np.clip (y, 0, h-1) / self.view_scale - - key_events = io.get_key_events(self.wnd_name) - key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) - - if key == ord('\r') or key == ord('\n'): - #confirm frame - is_frame_done = True - data_rects.append (self.rect) - data_landmarks.append (self.landmarks) - break - elif key == ord(' '): - #confirm skip frame - is_frame_done = True - break - elif key == ord(',') and len(self.result) > 0: - #go prev frame - - if self.rect_locked: - self.rect_locked = False - # Only save the face if the rect is still locked - data_rects.append (self.rect) - data_landmarks.append (self.landmarks) - - - self.input_data.insert(0, self.result.pop() ) - io.progress_bar_inc(-1) - need_remark_face = True - - break - elif key == ord('.'): - #go next frame - - if self.rect_locked: - self.rect_locked = False - # Only save the face if the rect is still locked - data_rects.append (self.rect) - data_landmarks.append (self.landmarks) - - need_remark_face = True - is_frame_done = True - break - elif key == ord('q'): - #skip remaining - - if self.rect_locked: - self.rect_locked = False - data_rects.append (self.rect) - data_landmarks.append (self.landmarks) - - while len(self.input_data) > 0: - self.result.append( self.input_data.pop(0) ) - io.progress_bar_inc(1) - - break - - elif key == ord('h'): - self.hide_help = not self.hide_help - break - elif key == ord('a'): - self.landmarks_accurate = not self.landmarks_accurate - break - - if self.x != new_x or \ - self.y != new_y or \ - self.rect_size != new_rect_size or \ - self.extract_needed or \ - redraw_needed: - self.x = new_x - self.y = new_y - self.rect_size = new_rect_size - self.rect = ( int(self.x-self.rect_size), - int(self.y-self.rect_size), - int(self.x+self.rect_size), - int(self.y+self.rect_size) ) - - if redraw_needed: - redraw_needed = False - return ExtractSubprocessor.Data (filename, landmarks_accurate=self.landmarks_accurate) - else: - return ExtractSubprocessor.Data (filename, rects=[self.rect], landmarks_accurate=self.landmarks_accurate) - - else: - is_frame_done = True - - if is_frame_done: - self.result.append ( data ) - self.input_data.pop(0) - io.progress_bar_inc(1) - self.extract_needed = True - self.rect_locked = False - - return None - - #override - def on_data_return (self, host_dict, data): - if not self.manual: - self.input_data.insert(0, data) - - #override - def on_result (self, host_dict, data, result): - if self.manual == True: - filename, landmarks = result.filename, result.landmarks - if len(landmarks) != 0: - self.landmarks = landmarks[0] - - (h,w,c) = self.image.shape - - if not self.hide_help: - image = cv2.addWeighted (self.image,1.0,self.text_lines_img,1.0,0) - else: - image = self.image.copy() - - view_rect = (np.array(self.rect) * self.view_scale).astype(np.int).tolist() - view_landmarks = (np.array(self.landmarks) * self.view_scale).astype(np.int).tolist() - - if self.rect_size <= 40: - scaled_rect_size = h // 3 if w > h else w // 3 - - p1 = (self.x - self.rect_size, self.y - self.rect_size) - p2 = (self.x + self.rect_size, self.y - self.rect_size) - p3 = (self.x - self.rect_size, self.y + self.rect_size) - - wh = h if h < w else w - np1 = (w / 2 - wh / 4, h / 2 - wh / 4) - np2 = (w / 2 + wh / 4, h / 2 - wh / 4) - np3 = (w / 2 - wh / 4, h / 2 + wh / 4) - - mat = cv2.getAffineTransform( np.float32([p1,p2,p3])*self.view_scale, np.float32([np1,np2,np3]) ) - image = cv2.warpAffine(image, mat,(w,h) ) - view_landmarks = LandmarksProcessor.transform_points (view_landmarks, mat) - - landmarks_color = (255,255,0) if self.rect_locked else (0,255,0) - LandmarksProcessor.draw_rect_landmarks (image, view_rect, view_landmarks, self.image_size, self.face_type, landmarks_color=landmarks_color) - self.extract_needed = False - - io.show_image (self.wnd_name, image) - else: - self.result.append ( result ) - io.progress_bar_inc(1) - - - - #override - def get_result(self): - return self.result - - @staticmethod - def get_devices_for_config (manual, type, multi_gpu, cpu_only): - backend = nnlib.device.backend - if 'cpu' in backend: - cpu_only = True - - if 'rects' in type or type == 'landmarks' or type == 'fanseg': - if not cpu_only and type == 'rects-mt' and backend == "plaidML": #plaidML works with MT very slowly - cpu_only = True - - if not cpu_only: - devices = [] - if not manual and multi_gpu: - devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2) - - if len(devices) == 0: - idx = nnlib.device.getBestValidDeviceIdx() - if idx != -1: - devices = [idx] - - if len(devices) == 0: - cpu_only = True - - result = [] - for idx in devices: - dev_name = nnlib.device.getDeviceName(idx) - dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx) - - if not manual and (type == 'rects-dlib' or type == 'rects-mt' ): - for i in range ( int (max (1, dev_vram / 2) ) ): - result += [ (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram) ] - else: - result += [ (idx, 'GPU', dev_name, dev_vram) ] - - return result - - if cpu_only: - if manual: - return [ (0, 'CPU', 'CPU', 0 ) ] - else: - return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ] - - elif type == 'final': - return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range(min(8, multiprocessing.cpu_count())) ] - -class DeletedFilesSearcherSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.debug_paths_stems = client_dict['debug_paths_stems'] - return None - - #override - def process_data(self, data): - input_path_stem = Path(data[0]).stem - return any ( [ input_path_stem == d_stem for d_stem in self.debug_paths_stems] ) - - #override - def get_data_name (self, data): - #return string identificator of your data - return data[0] - - #override - def __init__(self, input_paths, debug_paths ): - self.input_paths = input_paths - self.debug_paths_stems = [ Path(d).stem for d in debug_paths] - self.result = [] - super().__init__('DeletedFilesSearcherSubprocessor', DeletedFilesSearcherSubprocessor.Cli, 60) - - #override - def process_info_generator(self): - for i in range(min(multiprocessing.cpu_count(), 8)): - yield 'CPU%d' % (i), {}, {'debug_paths_stems' : self.debug_paths_stems} - - #override - def on_clients_initialized(self): - io.progress_bar ("Searching deleted files", len (self.input_paths)) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def get_data(self, host_dict): - if len (self.input_paths) > 0: - return [self.input_paths.pop(0)] - return None - - #override - def on_data_return (self, host_dict, data): - self.input_paths.insert(0, data[0]) - - #override - def on_result (self, host_dict, data, result): - if result == False: - self.result.append( data[0] ) - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.result - - -#currently unused -def extract_fanseg(input_dir, device_args={} ): - multi_gpu = device_args.get('multi_gpu', False) - cpu_only = device_args.get('cpu_only', False) - - input_path = Path(input_dir) - if not input_path.exists(): - raise ValueError('Input directory not found. Please ensure it exists.') - - paths_to_extract = [] - for filename in Path_utils.get_image_paths(input_path) : - filepath = Path(filename) - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is not None: - paths_to_extract.append (filepath) - - paths_to_extract_len = len(paths_to_extract) - if paths_to_extract_len > 0: - io.log_info ("Performing extract fanseg for %d files..." % (paths_to_extract_len) ) - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in paths_to_extract ], 'fanseg', multi_gpu=multi_gpu, cpu_only=cpu_only).run() - -def extract_umd_csv(input_file_csv, - image_size=256, - face_type='full_face', - device_args={} ): - - #extract faces from umdfaces.io dataset csv file with pitch,yaw,roll info. - multi_gpu = device_args.get('multi_gpu', False) - cpu_only = device_args.get('cpu_only', False) - face_type = FaceType.fromString(face_type) - - input_file_csv_path = Path(input_file_csv) - if not input_file_csv_path.exists(): - raise ValueError('input_file_csv not found. Please ensure it exists.') - - input_file_csv_root_path = input_file_csv_path.parent - output_path = input_file_csv_path.parent / ('aligned_' + input_file_csv_path.name) - - io.log_info("Output dir is %s." % (str(output_path)) ) - - if output_path.exists(): - output_images_paths = Path_utils.get_image_paths(output_path) - if len(output_images_paths) > 0: - io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) - for filename in output_images_paths: - Path(filename).unlink() - else: - output_path.mkdir(parents=True, exist_ok=True) - - try: - with open( str(input_file_csv_path), 'r') as f: - csv_file = f.read() - except Exception as e: - io.log_err("Unable to open or read file " + str(input_file_csv_path) + ": " + str(e) ) - return - - strings = csv_file.split('\n') - keys = strings[0].split(',') - keys_len = len(keys) - csv_data = [] - for i in range(1, len(strings)): - values = strings[i].split(',') - if keys_len != len(values): - io.log_err("Wrong string in csv file, skipping.") - continue - - csv_data += [ { keys[n] : values[n] for n in range(keys_len) } ] - - data = [] - for d in csv_data: - filename = input_file_csv_root_path / d['FILE'] - - pitch, yaw, roll = float(d['PITCH']), float(d['YAW']), float(d['ROLL']) - if pitch < -90 or pitch > 90 or yaw < -90 or yaw > 90 or roll < -90 or roll > 90: - continue - - pitch_yaw_roll = pitch/90.0, yaw/90.0, roll/90.0 - - x,y,w,h = float(d['FACE_X']), float(d['FACE_Y']), float(d['FACE_WIDTH']), float(d['FACE_HEIGHT']) - - data += [ ExtractSubprocessor.Data(filename=filename, rects=[ [x,y,x+w,y+h] ], pitch_yaw_roll=pitch_yaw_roll) ] - - images_found = len(data) - faces_detected = 0 - if len(data) > 0: - io.log_info ("Performing 2nd pass from csv file...") - data = ExtractSubprocessor (data, 'landmarks', multi_gpu=multi_gpu, cpu_only=cpu_only).run() - - io.log_info ('Performing 3rd pass...') - data = ExtractSubprocessor (data, 'final', image_size, face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() - faces_detected += sum([d.faces_detected for d in data]) - - - io.log_info ('-------------------------') - io.log_info ('Images found: %d' % (images_found) ) - io.log_info ('Faces detected: %d' % (faces_detected) ) - io.log_info ('-------------------------') - -def main(input_dir, - output_dir, - debug_dir=None, - detector='mt', - manual_fix=False, - manual_output_debug_fix=False, - manual_window_size=1368, - image_size=256, - face_type='full_face', - device_args={}): - - input_path = Path(input_dir) - output_path = Path(output_dir) - face_type = FaceType.fromString(face_type) - - multi_gpu = device_args.get('multi_gpu', False) - cpu_only = device_args.get('cpu_only', False) - - if not input_path.exists(): - raise ValueError('Input directory not found. Please ensure it exists.') - - if output_path.exists(): - if not manual_output_debug_fix and input_path != output_path: - output_images_paths = Path_utils.get_image_paths(output_path) - if len(output_images_paths) > 0: - io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) - for filename in output_images_paths: - Path(filename).unlink() - else: - output_path.mkdir(parents=True, exist_ok=True) - - if manual_output_debug_fix: - if debug_dir is None: - raise ValueError('debug-dir must be specified') - detector = 'manual' - io.log_info('Performing re-extract frames which were deleted from _debug directory.') - - input_path_image_paths = Path_utils.get_image_unique_filestem_paths(input_path, verbose_print_func=io.log_info) - if debug_dir is not None: - debug_output_path = Path(debug_dir) - - if manual_output_debug_fix: - if not debug_output_path.exists(): - raise ValueError("%s not found " % ( str(debug_output_path) )) - - input_path_image_paths = DeletedFilesSearcherSubprocessor (input_path_image_paths, Path_utils.get_image_paths(debug_output_path) ).run() - input_path_image_paths = sorted (input_path_image_paths) - io.log_info('Found %d images.' % (len(input_path_image_paths))) - else: - if debug_output_path.exists(): - for filename in Path_utils.get_image_paths(debug_output_path): - Path(filename).unlink() - else: - debug_output_path.mkdir(parents=True, exist_ok=True) - - images_found = len(input_path_image_paths) - faces_detected = 0 - if images_found != 0: - if detector == 'manual': - io.log_info ('Performing manual extract...') - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run() - else: - io.log_info ('Performing 1st pass...') - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() - - io.log_info ('Performing 2nd pass...') - data = ExtractSubprocessor (data, 'landmarks', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() - - io.log_info ('Performing 3rd pass...') - data = ExtractSubprocessor (data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() - faces_detected += sum([d.faces_detected for d in data]) - - if manual_fix: - if all ( np.array ( [ d.faces_detected > 0 for d in data] ) == True ): - io.log_info ('All faces are detected, manual fix not needed.') - else: - fix_data = [ ExtractSubprocessor.Data(d.filename) for d in data if d.faces_detected == 0 ] - io.log_info ('Performing manual fix for %d images...' % (len(fix_data)) ) - fix_data = ExtractSubprocessor (fix_data, 'landmarks', image_size, face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run() - fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() - faces_detected += sum([d.faces_detected for d in fix_data]) - - - io.log_info ('-------------------------') - io.log_info ('Images found: %d' % (images_found) ) - io.log_info ('Faces detected: %d' % (faces_detected) ) - io.log_info ('-------------------------') +import traceback +import os +import sys +import time +import multiprocessing +import shutil +from pathlib import Path +import numpy as np +import mathlib +import imagelib +import cv2 +from utils import Path_utils +from utils.DFLPNG import DFLPNG +from utils.DFLJPG import DFLJPG +from utils.cv2_utils import * +import facelib +from facelib import FaceType +from facelib import LandmarksProcessor +from facelib import FANSegmentator +from nnlib import nnlib +from joblib import Subprocessor +from interact import interact as io + +class ExtractSubprocessor(Subprocessor): + class Data(object): + def __init__(self, filename=None, rects=None, landmarks = None, landmarks_accurate=True, pitch_yaw_roll=None, final_output_files = None): + self.filename = filename + self.rects = rects or [] + self.rects_rotation = 0 + self.landmarks_accurate = landmarks_accurate + self.landmarks = landmarks or [] + self.pitch_yaw_roll = pitch_yaw_roll + self.final_output_files = final_output_files or [] + self.faces_detected = 0 + + class Cli(Subprocessor.Cli): + + #override + def on_initialize(self, client_dict): + self.type = client_dict['type'] + self.image_size = client_dict['image_size'] + self.face_type = client_dict['face_type'] + self.device_idx = client_dict['device_idx'] + self.cpu_only = client_dict['device_type'] == 'CPU' + self.final_output_path = Path(client_dict['final_output_dir']) if 'final_output_dir' in client_dict.keys() else None + self.debug_dir = client_dict['debug_dir'] + + self.cached_image = (None, None) + + self.e = None + device_config = nnlib.DeviceConfig ( cpu_only=self.cpu_only, force_gpu_idx=self.device_idx, allow_growth=True) + self.device_vram = device_config.gpu_vram_gb[0] + + intro_str = 'Running on %s.' % (client_dict['device_name']) + if not self.cpu_only and self.device_vram <= 2: + intro_str += " Recommended to close all programs using this device." + + self.log_info (intro_str) + + if 'rects' in self.type: + if self.type == 'rects-mt': + nnlib.import_all (device_config) + self.e = facelib.MTCExtractor() + elif self.type == 'rects-dlib': + nnlib.import_dlib (device_config) + self.e = facelib.DLIBExtractor(nnlib.dlib) + elif self.type == 'rects-s3fd': + nnlib.import_all (device_config) + self.e = facelib.S3FDExtractor() + else: + raise ValueError ("Wrong type.") + + if self.e is not None: + self.e.__enter__() + + elif self.type == 'landmarks': + nnlib.import_all (device_config) + self.e = facelib.LandmarksExtractor(nnlib.keras) + self.e.__enter__() + if self.device_vram >= 2: + self.second_pass_e = facelib.S3FDExtractor() + self.second_pass_e.__enter__() + else: + self.second_pass_e = None + + elif self.type == 'fanseg': + nnlib.import_all (device_config) + self.e = facelib.FANSegmentator(256, FaceType.toString(FaceType.FULL) ) + self.e.__enter__() + + elif self.type == 'final': + pass + + #override + def on_finalize(self): + if self.e is not None: + self.e.__exit__() + + #override + def process_data(self, data): + filename_path = Path( data.filename ) + + filename_path_str = str(filename_path) + if self.cached_image[0] == filename_path_str: + image = self.cached_image[1] #cached image for manual extractor + else: + image = cv2_imread( filename_path_str ) + + if image is None: + self.log_err ( 'Failed to extract %s, reason: cv2_imread() fail.' % ( str(filename_path) ) ) + return data + + image_shape = image.shape + if len(image_shape) == 2: + h, w = image.shape + image = image[:,:,np.newaxis] + ch = 1 + else: + h, w, ch = image.shape + + if ch == 1: + image = np.repeat (image, 3, -1) + elif ch == 4: + image = image[:,:,0:3] + + wm, hm = w % 2, h % 2 + if wm + hm != 0: #fix odd image + image = image[0:h-hm,0:w-wm,:] + self.cached_image = ( filename_path_str, image ) + + src_dflimg = None + h, w, ch = image.shape + if h == w: + #extracting from already extracted jpg image? + if filename_path.suffix == '.png': + src_dflimg = DFLPNG.load ( str(filename_path) ) + if filename_path.suffix == '.jpg': + src_dflimg = DFLJPG.load ( str(filename_path) ) + + if 'rects' in self.type: + if min(w,h) < 128: + self.log_err ( 'Image is too small %s : [%d, %d]' % ( str(filename_path), w, h ) ) + data.rects = [] + else: + for rot in ([0, 90, 270, 180]): + data.rects_rotation = rot + if rot == 0: + rotated_image = image + elif rot == 90: + rotated_image = image.swapaxes( 0,1 )[:,::-1,:] + elif rot == 180: + rotated_image = image[::-1,::-1,:] + elif rot == 270: + rotated_image = image.swapaxes( 0,1 )[::-1,:,:] + + rects = data.rects = self.e.extract (rotated_image, is_bgr=True) + if len(rects) != 0: + break + + return data + + elif self.type == 'landmarks': + + if data.rects_rotation == 0: + rotated_image = image + elif data.rects_rotation == 90: + rotated_image = image.swapaxes( 0,1 )[:,::-1,:] + elif data.rects_rotation == 180: + rotated_image = image[::-1,::-1,:] + elif data.rects_rotation == 270: + rotated_image = image.swapaxes( 0,1 )[::-1,:,:] + + data.landmarks = self.e.extract (rotated_image, data.rects, self.second_pass_e if (src_dflimg is None and data.landmarks_accurate) else None, is_bgr=True) + if data.rects_rotation != 0: + for i, (rect, lmrks) in enumerate(zip(data.rects, data.landmarks)): + new_rect, new_lmrks = rect, lmrks + (l,t,r,b) = rect + if data.rects_rotation == 90: + new_rect = ( t, h-l, b, h-r) + if lmrks is not None: + new_lmrks = lmrks[:,::-1].copy() + new_lmrks[:,1] = h - new_lmrks[:,1] + elif data.rects_rotation == 180: + if lmrks is not None: + new_rect = ( w-l, h-t, w-r, h-b) + new_lmrks = lmrks.copy() + new_lmrks[:,0] = w - new_lmrks[:,0] + new_lmrks[:,1] = h - new_lmrks[:,1] + elif data.rects_rotation == 270: + new_rect = ( w-b, l, w-t, r ) + if lmrks is not None: + new_lmrks = lmrks[:,::-1].copy() + new_lmrks[:,0] = w - new_lmrks[:,0] + data.rects[i], data.landmarks[i] = new_rect, new_lmrks + + return data + + elif self.type == 'final': + data.final_output_files = [] + rects = data.rects + landmarks = data.landmarks + + if self.debug_dir is not None: + debug_output_file = str( Path(self.debug_dir) / (filename_path.stem+'.jpg') ) + debug_image = image.copy() + + if src_dflimg is not None and len(rects) != 1: + #if re-extracting from dflimg and more than 1 or zero faces detected - dont process and just copy it + print("src_dflimg is not None and len(rects) != 1", str(filename_path) ) + output_file = str(self.final_output_path / filename_path.name) + if str(filename_path) != str(output_file): + shutil.copy ( str(filename_path), str(output_file) ) + data.final_output_files.append (output_file) + else: + face_idx = 0 + for rect, image_landmarks in zip( rects, landmarks ): + if src_dflimg is not None and face_idx > 1: + #cannot extract more than 1 face from dflimg + break + + if image_landmarks is None: + continue + + rect = np.array(rect) + + if self.face_type == FaceType.MARK_ONLY: + face_image = image + face_image_landmarks = image_landmarks + else: + image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type) + face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4) + face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) + + landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True) + + rect_area = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]])) + landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] ) + + if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area + continue + + if self.debug_dir is not None: + LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type, transparent_mask=True) + + if src_dflimg is not None and filename_path.suffix == '.jpg': + #if extracting from dflimg and jpg copy it in order not to lose quality + output_file = str(self.final_output_path / filename_path.name) + if str(filename_path) != str(output_file): + shutil.copy ( str(filename_path), str(output_file) ) + else: + output_file = '{}_{}{}'.format(str(self.final_output_path / filename_path.stem), str(face_idx), '.jpg') + cv2_imwrite(output_file, face_image, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) + + DFLJPG.embed_data(output_file, face_type=FaceType.toString(self.face_type), + landmarks=face_image_landmarks.tolist(), + source_filename=filename_path.name, + source_rect=rect, + source_landmarks=image_landmarks.tolist(), + image_to_face_mat=image_to_face_mat, + pitch_yaw_roll=data.pitch_yaw_roll + ) + + data.final_output_files.append (output_file) + face_idx += 1 + data.faces_detected = face_idx + + if self.debug_dir is not None: + cv2_imwrite(debug_output_file, debug_image, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) + + return data + + elif self.type == 'fanseg': + if src_dflimg is not None: + fanseg_mask = self.e.extract( image / 255.0 ) + src_dflimg.embed_and_set( filename_path_str, + fanseg_mask=fanseg_mask, + #fanseg_mask_ver=FANSegmentator.VERSION, + ) + + #overridable + def get_data_name (self, data): + #return string identificator of your data + return data.filename + + #override + def __init__(self, input_data, type, image_size=None, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, final_output_path=None): + self.input_data = input_data + self.type = type + self.image_size = image_size + self.face_type = face_type + self.debug_dir = debug_dir + self.final_output_path = final_output_path + self.manual = manual + self.manual_window_size = manual_window_size + self.result = [] + + self.devices = ExtractSubprocessor.get_devices_for_config(self.manual, self.type, multi_gpu, cpu_only) + + no_response_time_sec = 60 if not self.manual else 999999 + super().__init__('Extractor', ExtractSubprocessor.Cli, no_response_time_sec) + + #override + def on_check_run(self): + if len(self.devices) == 0: + io.log_err("No devices found to start subprocessor.") + return False + return True + + #override + def on_clients_initialized(self): + if self.manual == True: + self.wnd_name = 'Manual pass' + io.named_window(self.wnd_name) + io.capture_mouse(self.wnd_name) + io.capture_keys(self.wnd_name) + + self.cache_original_image = (None, None) + self.cache_image = (None, None) + self.cache_text_lines_img = (None, None) + self.hide_help = False + self.landmarks_accurate = True + + self.landmarks = None + self.x = 0 + self.y = 0 + self.rect_size = 100 + self.rect_locked = False + self.extract_needed = True + + io.progress_bar (None, len (self.input_data)) + + #override + def on_clients_finalized(self): + if self.manual == True: + io.destroy_all_windows() + + io.progress_bar_close() + + #override + def process_info_generator(self): + base_dict = {'type' : self.type, + 'image_size': self.image_size, + 'face_type': self.face_type, + 'debug_dir': self.debug_dir, + 'final_output_dir': str(self.final_output_path)} + + + for (device_idx, device_type, device_name, device_total_vram_gb) in self.devices: + client_dict = base_dict.copy() + client_dict['device_idx'] = device_idx + client_dict['device_name'] = device_name + client_dict['device_type'] = device_type + yield client_dict['device_name'], {}, client_dict + + #override + def get_data(self, host_dict): + if not self.manual: + if len (self.input_data) > 0: + return self.input_data.pop(0) + else: + need_remark_face = False + redraw_needed = False + while len (self.input_data) > 0: + data = self.input_data[0] + filename, data_rects, data_landmarks = data.filename, data.rects, data.landmarks + is_frame_done = False + + if need_remark_face: # need remark image from input data that already has a marked face? + need_remark_face = False + if len(data_rects) != 0: # If there was already a face then lock the rectangle to it until the mouse is clicked + self.rect = data_rects.pop() + self.landmarks = data_landmarks.pop() + data_rects.clear() + data_landmarks.clear() + redraw_needed = True + self.rect_locked = True + self.rect_size = ( self.rect[2] - self.rect[0] ) / 2 + self.x = ( self.rect[0] + self.rect[2] ) / 2 + self.y = ( self.rect[1] + self.rect[3] ) / 2 + + if len(data_rects) == 0: + if self.cache_original_image[0] == filename: + self.original_image = self.cache_original_image[1] + else: + self.original_image = cv2_imread( filename ) + self.cache_original_image = (filename, self.original_image ) + + (h,w,c) = self.original_image.shape + self.view_scale = 1.0 if self.manual_window_size == 0 else self.manual_window_size / ( h * (16.0/9.0) ) + + if self.cache_image[0] == (h,w,c) + (self.view_scale,filename): + self.image = self.cache_image[1] + else: + self.image = cv2.resize (self.original_image, ( int(w*self.view_scale), int(h*self.view_scale) ), interpolation=cv2.INTER_LINEAR) + self.cache_image = ( (h,w,c) + (self.view_scale,filename), self.image ) + + (h,w,c) = self.image.shape + + sh = (0,0, w, min(100, h) ) + if self.cache_text_lines_img[0] == sh: + self.text_lines_img = self.cache_text_lines_img[1] + else: + self.text_lines_img = (imagelib.get_draw_text_lines ( self.image, sh, + [ '[Mouse click] - lock/unlock selection', + '[Mouse wheel] - change rect', + '[Enter] / [Space] - confirm / skip frame', + '[,] [.]- prev frame, next frame. [Q] - skip remaining frames', + '[a] - accuracy on/off (more fps)', + '[h] - hide this help' + ], (1, 1, 1) )*255).astype(np.uint8) + + self.cache_text_lines_img = (sh, self.text_lines_img) + + while True: + io.process_messages(0.0001) + + new_x = self.x + new_y = self.y + new_rect_size = self.rect_size + + mouse_events = io.get_mouse_events(self.wnd_name) + for ev in mouse_events: + (x, y, ev, flags) = ev + if ev == io.EVENT_MOUSEWHEEL and not self.rect_locked: + mod = 1 if flags > 0 else -1 + diff = 1 if new_rect_size <= 40 else np.clip(new_rect_size / 10, 1, 10) + new_rect_size = max (5, new_rect_size + diff*mod) + elif ev == io.EVENT_LBUTTONDOWN: + self.rect_locked = not self.rect_locked + self.extract_needed = True + elif not self.rect_locked: + new_x = np.clip (x, 0, w-1) / self.view_scale + new_y = np.clip (y, 0, h-1) / self.view_scale + + key_events = io.get_key_events(self.wnd_name) + key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) + + if key == ord('\r') or key == ord('\n'): + #confirm frame + is_frame_done = True + data_rects.append (self.rect) + data_landmarks.append (self.landmarks) + break + elif key == ord(' '): + #confirm skip frame + is_frame_done = True + break + elif key == ord(',') and len(self.result) > 0: + #go prev frame + + if self.rect_locked: + self.rect_locked = False + # Only save the face if the rect is still locked + data_rects.append (self.rect) + data_landmarks.append (self.landmarks) + + + self.input_data.insert(0, self.result.pop() ) + io.progress_bar_inc(-1) + need_remark_face = True + + break + elif key == ord('.'): + #go next frame + + if self.rect_locked: + self.rect_locked = False + # Only save the face if the rect is still locked + data_rects.append (self.rect) + data_landmarks.append (self.landmarks) + + need_remark_face = True + is_frame_done = True + break + elif key == ord('q'): + #skip remaining + + if self.rect_locked: + self.rect_locked = False + data_rects.append (self.rect) + data_landmarks.append (self.landmarks) + + while len(self.input_data) > 0: + self.result.append( self.input_data.pop(0) ) + io.progress_bar_inc(1) + + break + + elif key == ord('h'): + self.hide_help = not self.hide_help + break + elif key == ord('a'): + self.landmarks_accurate = not self.landmarks_accurate + break + + if self.x != new_x or \ + self.y != new_y or \ + self.rect_size != new_rect_size or \ + self.extract_needed or \ + redraw_needed: + self.x = new_x + self.y = new_y + self.rect_size = new_rect_size + self.rect = ( int(self.x-self.rect_size), + int(self.y-self.rect_size), + int(self.x+self.rect_size), + int(self.y+self.rect_size) ) + + if redraw_needed: + redraw_needed = False + return ExtractSubprocessor.Data (filename, landmarks_accurate=self.landmarks_accurate) + else: + return ExtractSubprocessor.Data (filename, rects=[self.rect], landmarks_accurate=self.landmarks_accurate) + + else: + is_frame_done = True + + if is_frame_done: + self.result.append ( data ) + self.input_data.pop(0) + io.progress_bar_inc(1) + self.extract_needed = True + self.rect_locked = False + + return None + + #override + def on_data_return (self, host_dict, data): + if not self.manual: + self.input_data.insert(0, data) + + #override + def on_result (self, host_dict, data, result): + if self.manual == True: + filename, landmarks = result.filename, result.landmarks + if len(landmarks) != 0: + self.landmarks = landmarks[0] + + (h,w,c) = self.image.shape + + if not self.hide_help: + image = cv2.addWeighted (self.image,1.0,self.text_lines_img,1.0,0) + else: + image = self.image.copy() + + view_rect = (np.array(self.rect) * self.view_scale).astype(np.int).tolist() + view_landmarks = (np.array(self.landmarks) * self.view_scale).astype(np.int).tolist() + + if self.rect_size <= 40: + scaled_rect_size = h // 3 if w > h else w // 3 + + p1 = (self.x - self.rect_size, self.y - self.rect_size) + p2 = (self.x + self.rect_size, self.y - self.rect_size) + p3 = (self.x - self.rect_size, self.y + self.rect_size) + + wh = h if h < w else w + np1 = (w / 2 - wh / 4, h / 2 - wh / 4) + np2 = (w / 2 + wh / 4, h / 2 - wh / 4) + np3 = (w / 2 - wh / 4, h / 2 + wh / 4) + + mat = cv2.getAffineTransform( np.float32([p1,p2,p3])*self.view_scale, np.float32([np1,np2,np3]) ) + image = cv2.warpAffine(image, mat,(w,h) ) + view_landmarks = LandmarksProcessor.transform_points (view_landmarks, mat) + + landmarks_color = (255,255,0) if self.rect_locked else (0,255,0) + LandmarksProcessor.draw_rect_landmarks (image, view_rect, view_landmarks, self.image_size, self.face_type, landmarks_color=landmarks_color) + self.extract_needed = False + + io.show_image (self.wnd_name, image) + else: + self.result.append ( result ) + io.progress_bar_inc(1) + + + + #override + def get_result(self): + return self.result + + @staticmethod + def get_devices_for_config (manual, type, multi_gpu, cpu_only): + backend = nnlib.device.backend + if 'cpu' in backend: + cpu_only = True + + if 'rects' in type or type == 'landmarks' or type == 'fanseg': + if not cpu_only and type == 'rects-mt' and backend == "plaidML": #plaidML works with MT very slowly + cpu_only = True + + if not cpu_only: + devices = [] + if not manual and multi_gpu: + devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2) + + if len(devices) == 0: + idx = nnlib.device.getBestValidDeviceIdx() + if idx != -1: + devices = [idx] + + if len(devices) == 0: + cpu_only = True + + result = [] + for idx in devices: + dev_name = nnlib.device.getDeviceName(idx) + dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx) + + if not manual and (type == 'rects-dlib' or type == 'rects-mt' ): + for i in range ( int (max (1, dev_vram / 2) ) ): + result += [ (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram) ] + else: + result += [ (idx, 'GPU', dev_name, dev_vram) ] + + return result + + if cpu_only: + if manual: + return [ (0, 'CPU', 'CPU', 0 ) ] + else: + return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ] + + elif type == 'final': + return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range(min(8, multiprocessing.cpu_count())) ] + +class DeletedFilesSearcherSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.debug_paths_stems = client_dict['debug_paths_stems'] + return None + + #override + def process_data(self, data): + input_path_stem = Path(data[0]).stem + return any ( [ input_path_stem == d_stem for d_stem in self.debug_paths_stems] ) + + #override + def get_data_name (self, data): + #return string identificator of your data + return data[0] + + #override + def __init__(self, input_paths, debug_paths ): + self.input_paths = input_paths + self.debug_paths_stems = [ Path(d).stem for d in debug_paths] + self.result = [] + super().__init__('DeletedFilesSearcherSubprocessor', DeletedFilesSearcherSubprocessor.Cli, 60) + + #override + def process_info_generator(self): + for i in range(min(multiprocessing.cpu_count(), 8)): + yield 'CPU%d' % (i), {}, {'debug_paths_stems' : self.debug_paths_stems} + + #override + def on_clients_initialized(self): + io.progress_bar ("Searching deleted files", len (self.input_paths)) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def get_data(self, host_dict): + if len (self.input_paths) > 0: + return [self.input_paths.pop(0)] + return None + + #override + def on_data_return (self, host_dict, data): + self.input_paths.insert(0, data[0]) + + #override + def on_result (self, host_dict, data, result): + if result == False: + self.result.append( data[0] ) + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.result + + +#currently unused +def extract_fanseg(input_dir, device_args={} ): + multi_gpu = device_args.get('multi_gpu', False) + cpu_only = device_args.get('cpu_only', False) + + input_path = Path(input_dir) + if not input_path.exists(): + raise ValueError('Input directory not found. Please ensure it exists.') + + paths_to_extract = [] + for filename in Path_utils.get_image_paths(input_path) : + filepath = Path(filename) + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is not None: + paths_to_extract.append (filepath) + + paths_to_extract_len = len(paths_to_extract) + if paths_to_extract_len > 0: + io.log_info ("Performing extract fanseg for %d files..." % (paths_to_extract_len) ) + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in paths_to_extract ], 'fanseg', multi_gpu=multi_gpu, cpu_only=cpu_only).run() + +def extract_umd_csv(input_file_csv, + image_size=256, + face_type='full_face', + device_args={} ): + + #extract faces from umdfaces.io dataset csv file with pitch,yaw,roll info. + multi_gpu = device_args.get('multi_gpu', False) + cpu_only = device_args.get('cpu_only', False) + face_type = FaceType.fromString(face_type) + + input_file_csv_path = Path(input_file_csv) + if not input_file_csv_path.exists(): + raise ValueError('input_file_csv not found. Please ensure it exists.') + + input_file_csv_root_path = input_file_csv_path.parent + output_path = input_file_csv_path.parent / ('aligned_' + input_file_csv_path.name) + + io.log_info("Output dir is %s." % (str(output_path)) ) + + if output_path.exists(): + output_images_paths = Path_utils.get_image_paths(output_path) + if len(output_images_paths) > 0: + io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) + for filename in output_images_paths: + Path(filename).unlink() + else: + output_path.mkdir(parents=True, exist_ok=True) + + try: + with open( str(input_file_csv_path), 'r') as f: + csv_file = f.read() + except Exception as e: + io.log_err("Unable to open or read file " + str(input_file_csv_path) + ": " + str(e) ) + return + + strings = csv_file.split('\n') + keys = strings[0].split(',') + keys_len = len(keys) + csv_data = [] + for i in range(1, len(strings)): + values = strings[i].split(',') + if keys_len != len(values): + io.log_err("Wrong string in csv file, skipping.") + continue + + csv_data += [ { keys[n] : values[n] for n in range(keys_len) } ] + + data = [] + for d in csv_data: + filename = input_file_csv_root_path / d['FILE'] + + pitch, yaw, roll = float(d['PITCH']), float(d['YAW']), float(d['ROLL']) + if pitch < -90 or pitch > 90 or yaw < -90 or yaw > 90 or roll < -90 or roll > 90: + continue + + pitch_yaw_roll = pitch/90.0, yaw/90.0, roll/90.0 + + x,y,w,h = float(d['FACE_X']), float(d['FACE_Y']), float(d['FACE_WIDTH']), float(d['FACE_HEIGHT']) + + data += [ ExtractSubprocessor.Data(filename=filename, rects=[ [x,y,x+w,y+h] ], pitch_yaw_roll=pitch_yaw_roll) ] + + images_found = len(data) + faces_detected = 0 + if len(data) > 0: + io.log_info ("Performing 2nd pass from csv file...") + data = ExtractSubprocessor (data, 'landmarks', multi_gpu=multi_gpu, cpu_only=cpu_only).run() + + io.log_info ('Performing 3rd pass...') + data = ExtractSubprocessor (data, 'final', image_size, face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + faces_detected += sum([d.faces_detected for d in data]) + + + io.log_info ('-------------------------') + io.log_info ('Images found: %d' % (images_found) ) + io.log_info ('Faces detected: %d' % (faces_detected) ) + io.log_info ('-------------------------') + +def main(input_dir, + output_dir, + debug_dir=None, + detector='mt', + manual_fix=False, + manual_output_debug_fix=False, + manual_window_size=1368, + image_size=256, + face_type='full_face', + device_args={}): + + input_path = Path(input_dir) + output_path = Path(output_dir) + face_type = FaceType.fromString(face_type) + + multi_gpu = device_args.get('multi_gpu', False) + cpu_only = device_args.get('cpu_only', False) + + if not input_path.exists(): + raise ValueError('Input directory not found. Please ensure it exists.') + + if output_path.exists(): + if not manual_output_debug_fix and input_path != output_path: + output_images_paths = Path_utils.get_image_paths(output_path) + if len(output_images_paths) > 0: + io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) + for filename in output_images_paths: + Path(filename).unlink() + else: + output_path.mkdir(parents=True, exist_ok=True) + + if manual_output_debug_fix: + if debug_dir is None: + raise ValueError('debug-dir must be specified') + detector = 'manual' + io.log_info('Performing re-extract frames which were deleted from _debug directory.') + + input_path_image_paths = Path_utils.get_image_unique_filestem_paths(input_path, verbose_print_func=io.log_info) + if debug_dir is not None: + debug_output_path = Path(debug_dir) + + if manual_output_debug_fix: + if not debug_output_path.exists(): + raise ValueError("%s not found " % ( str(debug_output_path) )) + + input_path_image_paths = DeletedFilesSearcherSubprocessor (input_path_image_paths, Path_utils.get_image_paths(debug_output_path) ).run() + input_path_image_paths = sorted (input_path_image_paths) + io.log_info('Found %d images.' % (len(input_path_image_paths))) + else: + if debug_output_path.exists(): + for filename in Path_utils.get_image_paths(debug_output_path): + Path(filename).unlink() + else: + debug_output_path.mkdir(parents=True, exist_ok=True) + + images_found = len(input_path_image_paths) + faces_detected = 0 + if images_found != 0: + if detector == 'manual': + io.log_info ('Performing manual extract...') + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run() + else: + io.log_info ('Performing 1st pass...') + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() + + io.log_info ('Performing 2nd pass...') + data = ExtractSubprocessor (data, 'landmarks', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() + + io.log_info ('Performing 3rd pass...') + data = ExtractSubprocessor (data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + faces_detected += sum([d.faces_detected for d in data]) + + if manual_fix: + if all ( np.array ( [ d.faces_detected > 0 for d in data] ) == True ): + io.log_info ('All faces are detected, manual fix not needed.') + else: + fix_data = [ ExtractSubprocessor.Data(d.filename) for d in data if d.faces_detected == 0 ] + io.log_info ('Performing manual fix for %d images...' % (len(fix_data)) ) + fix_data = ExtractSubprocessor (fix_data, 'landmarks', image_size, face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run() + fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + faces_detected += sum([d.faces_detected for d in fix_data]) + + + io.log_info ('-------------------------') + io.log_info ('Images found: %d' % (images_found) ) + io.log_info ('Faces detected: %d' % (faces_detected) ) + io.log_info ('-------------------------') diff --git a/mainscripts/MaskEditorTool.py b/mainscripts/MaskEditorTool.py index 3f63375..1d9750f 100644 --- a/mainscripts/MaskEditorTool.py +++ b/mainscripts/MaskEditorTool.py @@ -1,556 +1,556 @@ -import os -import sys -import time -import traceback -from pathlib import Path - -import cv2 -import numpy as np -import numpy.linalg as npl - -import imagelib -from facelib import LandmarksProcessor -from imagelib import IEPolys -from interact import interact as io -from utils import Path_utils -from utils.cv2_utils import * -from utils.DFLJPG import DFLJPG -from utils.DFLPNG import DFLPNG - -class MaskEditor: - STATE_NONE=0 - STATE_MASKING=1 - - def __init__(self, img, prev_images, next_images, mask=None, ie_polys=None, get_status_lines_func=None): - self.img = imagelib.normalize_channels (img,3) - h, w, c = img.shape - - if h != w and w != 256: - #to support any square res, scale img,mask and ie_polys to 256, then scale ie_polys back on .get_ie_polys() - raise Exception ("MaskEditor does not support image size != 256x256") - - ph, pw = h // 4, w // 4 #pad wh - - self.prev_images = prev_images - self.next_images = next_images - - if mask is not None: - self.mask = imagelib.normalize_channels (mask,3) - else: - self.mask = np.zeros ( (h,w,3) ) - self.get_status_lines_func = get_status_lines_func - - self.state_prop = self.STATE_NONE - - self.w, self.h = w, h - self.pw, self.ph = pw, ph - self.pwh = np.array([self.pw, self.ph]) - self.pwh2 = np.array([self.pw*2, self.ph*2]) - self.sw, self.sh = w+pw*2, h+ph*2 - self.prwh = 64 #preview wh - - if ie_polys is None: - ie_polys = IEPolys() - self.ie_polys = ie_polys - - self.polys_mask = None - self.preview_images = None - - self.mouse_x = self.mouse_y = 9999 - self.screen_status_block = None - self.screen_status_block_dirty = True - self.screen_changed = True - - def set_state(self, state): - self.state = state - - @property - def state(self): - return self.state_prop - - @state.setter - def state(self, value): - self.state_prop = value - if value == self.STATE_MASKING: - self.ie_polys.dirty = True - - def get_mask(self): - if self.ie_polys.switch_dirty(): - self.screen_status_block_dirty = True - self.ie_mask = img = self.mask.copy() - - self.ie_polys.overlay_mask(img) - - return img - return self.ie_mask - - def get_screen_overlay(self): - img = np.zeros ( (self.sh, self.sw, 3) ) - - if self.state == self.STATE_MASKING: - mouse_xy = self.mouse_xy.copy() + self.pwh - l = self.ie_polys.n_list() - if l.n > 0: - p = l.cur_point().copy() + self.pwh - color = (0,1,0) if l.type == 1 else (0,0,1) - cv2.line(img, tuple(p), tuple(mouse_xy), color ) - - return img - - def undo_to_begin_point(self): - while not self.undo_point(): - pass - - def undo_point(self): - self.screen_changed = True - if self.state == self.STATE_NONE: - if self.ie_polys.n > 0: - self.state = self.STATE_MASKING - - if self.state == self.STATE_MASKING: - if self.ie_polys.n_list().n_dec() == 0 and \ - self.ie_polys.n_dec() == 0: - self.state = self.STATE_NONE - else: - return False - - return True - - def redo_to_end_point(self): - while not self.redo_point(): - pass - - def redo_point(self): - self.screen_changed = True - if self.state == self.STATE_NONE: - if self.ie_polys.n_max > 0: - self.state = self.STATE_MASKING - if self.ie_polys.n == 0: - self.ie_polys.n_inc() - - if self.state == self.STATE_MASKING: - while True: - l = self.ie_polys.n_list() - if l.n_inc() == l.n_max: - if self.ie_polys.n == self.ie_polys.n_max: - break - self.ie_polys.n_inc() - else: - return False - - return True - - def combine_screens(self, screens): - - screens_len = len(screens) - - new_screens = [] - for screen, padded_overlay in screens: - screen_img = np.zeros( (self.sh, self.sw, 3), dtype=np.float32 ) - - screen = imagelib.normalize_channels (screen, 3) - h,w,c = screen.shape - - screen_img[self.ph:-self.ph, self.pw:-self.pw, :] = screen - - if padded_overlay is not None: - screen_img = screen_img + padded_overlay - - screen_img = np.clip(screen_img*255, 0, 255).astype(np.uint8) - new_screens.append(screen_img) - - return np.concatenate (new_screens, axis=1) - - def get_screen_status_block(self, w, c): - if self.screen_status_block_dirty: - self.screen_status_block_dirty = False - lines = [ - 'Polys current/max = %d/%d' % (self.ie_polys.n, self.ie_polys.n_max), - ] - if self.get_status_lines_func is not None: - lines += self.get_status_lines_func() - - lines_count = len(lines) - - - h_line = 21 - h = lines_count * h_line - img = np.ones ( (h,w,c) ) * 0.1 - - for i in range(lines_count): - img[ i*h_line:(i+1)*h_line, 0:w] += \ - imagelib.get_text_image ( (h_line,w,c), lines[i], color=[0.8]*c ) - - self.screen_status_block = np.clip(img*255, 0, 255).astype(np.uint8) - - return self.screen_status_block - - def set_screen_status_block_dirty(self): - self.screen_status_block_dirty = True - - def set_screen_changed(self): - self.screen_changed = True - - def switch_screen_changed(self): - result = self.screen_changed - self.screen_changed = False - return result - - def make_screen(self): - screen_overlay = self.get_screen_overlay() - final_mask = self.get_mask() - - masked_img = self.img*final_mask*0.5 + self.img*(1-final_mask) - - pink = np.full ( (self.h, self.w, 3), (1,0,1) ) - pink_masked_img = self.img*final_mask + pink*(1-final_mask) - - - - - screens = [ (self.img, screen_overlay), - (masked_img, screen_overlay), - (pink_masked_img, screen_overlay), - ] - screens = self.combine_screens(screens) - - if self.preview_images is None: - sh,sw,sc = screens.shape - - prh, prw = self.prwh, self.prwh - - total_w = sum ([ img.shape[1] for (t,img) in self.prev_images ]) + \ - sum ([ img.shape[1] for (t,img) in self.next_images ]) - - total_images_len = len(self.prev_images) + len(self.next_images) - - max_hor_images_count = sw // prw - max_side_images_count = (max_hor_images_count - 1) // 2 - - prev_images = self.prev_images[-max_side_images_count:] - next_images = self.next_images[:max_side_images_count] - - border = 2 - - max_wh_bordered = (prw-border*2, prh-border*2) - - prev_images = [ (t, cv2.resize( imagelib.normalize_channels(img, 3), max_wh_bordered )) for t,img in prev_images ] - next_images = [ (t, cv2.resize( imagelib.normalize_channels(img, 3), max_wh_bordered )) for t,img in next_images ] - - for images in [prev_images, next_images]: - for i, (t, img) in enumerate(images): - new_img = np.zeros ( (prh,prw, sc) ) - new_img[border:-border,border:-border] = img - - if t == 2: - cv2.line (new_img, ( prw//2, int(prh//1.5) ), (int(prw/1.5), prh ) , (0,1,0), thickness=2 ) - cv2.line (new_img, ( int(prw/1.5), prh ), ( prw, prh // 2 ) , (0,1,0), thickness=2 ) - elif t == 1: - cv2.line (new_img, ( prw//2, prh//2 ), ( prw, prh ) , (0,0,1), thickness=2 ) - cv2.line (new_img, ( prw//2, prh ), ( prw, prh // 2 ) , (0,0,1), thickness=2 ) - - images[i] = new_img - - - preview_images = [] - if len(prev_images) > 0: - preview_images += [ np.concatenate (prev_images, axis=1) ] - - img = np.full ( (prh,prw, sc), (0,0,1), dtype=np.float ) - img[border:-border,border:-border] = cv2.resize( self.img, max_wh_bordered ) - - preview_images += [ img ] - - if len(next_images) > 0: - preview_images += [ np.concatenate (next_images, axis=1) ] - - preview_images = np.concatenate ( preview_images, axis=1 ) - - left_pad = sw // 2 - len(prev_images) * prw - prw // 2 - right_pad = sw // 2 - len(next_images) * prw - prw // 2 - - preview_images = np.concatenate ([np.zeros ( (preview_images.shape[0], left_pad, preview_images.shape[2]) ), - preview_images, - np.zeros ( (preview_images.shape[0], right_pad, preview_images.shape[2]) ) - ], axis=1) - self.preview_images = np.clip(preview_images * 255, 0, 255 ).astype(np.uint8) - - status_img = self.get_screen_status_block( screens.shape[1], screens.shape[2] ) - - result = np.concatenate ( [self.preview_images, screens, status_img], axis=0 ) - - return result - - def mask_finish(self, n_clip=True): - if self.state == self.STATE_MASKING: - self.screen_changed = True - if self.ie_polys.n_list().n <= 2: - self.ie_polys.n_dec() - self.state = self.STATE_NONE - if n_clip: - self.ie_polys.n_clip() - - def set_mouse_pos(self,x,y): - if self.preview_images is not None: - y -= self.preview_images.shape[0] - - mouse_x = x % (self.sw) - self.pw - mouse_y = y % (self.sh) - self.ph - - - - if mouse_x != self.mouse_x or mouse_y != self.mouse_y: - self.mouse_xy = np.array( [mouse_x, mouse_y] ) - self.mouse_x, self.mouse_y = self.mouse_xy - self.screen_changed = True - - def mask_point(self, type): - self.screen_changed = True - if self.state == self.STATE_MASKING and \ - self.ie_polys.n_list().type != type: - self.mask_finish() - - elif self.state == self.STATE_NONE: - self.state = self.STATE_MASKING - self.ie_polys.add(type) - - if self.state == self.STATE_MASKING: - self.ie_polys.n_list().add (self.mouse_x, self.mouse_y) - - def get_ie_polys(self): - return self.ie_polys - -def mask_editor_main(input_dir, confirmed_dir=None, skipped_dir=None): - input_path = Path(input_dir) - - confirmed_path = Path(confirmed_dir) - skipped_path = Path(skipped_dir) - - if not input_path.exists(): - raise ValueError('Input directory not found. Please ensure it exists.') - - if not confirmed_path.exists(): - confirmed_path.mkdir(parents=True) - - if not skipped_path.exists(): - skipped_path.mkdir(parents=True) - - wnd_name = "MaskEditor tool" - io.named_window (wnd_name) - io.capture_mouse(wnd_name) - io.capture_keys(wnd_name) - - cached_images = {} - - image_paths = [ Path(x) for x in Path_utils.get_image_paths(input_path)] - done_paths = [] - done_images_types = {} - image_paths_total = len(image_paths) - - zoom_factor = 1.0 - preview_images_count = 9 - target_wh = 256 - - do_prev_count = 0 - do_save_move_count = 0 - do_save_count = 0 - do_skip_move_count = 0 - do_skip_count = 0 - - def jobs_count(): - return do_prev_count + do_save_move_count + do_save_count + do_skip_move_count + do_skip_count - - is_exit = False - while not is_exit: - - if len(image_paths) > 0: - filepath = image_paths.pop(0) - else: - filepath = None - - next_image_paths = image_paths[0:preview_images_count] - next_image_paths_names = [ path.name for path in next_image_paths ] - prev_image_paths = done_paths[-preview_images_count:] - prev_image_paths_names = [ path.name for path in prev_image_paths ] - - for key in list( cached_images.keys() ): - if key not in prev_image_paths_names and \ - key not in next_image_paths_names: - cached_images.pop(key) - - for paths in [prev_image_paths, next_image_paths]: - for path in paths: - if path.name not in cached_images: - cached_images[path.name] = cv2_imread(str(path)) / 255.0 - - if filepath is not None: - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - else: - lmrks = dflimg.get_landmarks() - ie_polys = dflimg.get_ie_polys() - fanseg_mask = dflimg.get_fanseg_mask() - - if filepath.name in cached_images: - img = cached_images[filepath.name] - else: - img = cached_images[filepath.name] = cv2_imread(str(filepath)) / 255.0 - - if fanseg_mask is not None: - mask = fanseg_mask - else: - mask = LandmarksProcessor.get_image_hull_mask( img.shape, lmrks) - else: - img = np.zeros ( (target_wh,target_wh,3) ) - mask = np.ones ( (target_wh,target_wh,3) ) - ie_polys = None - - def get_status_lines_func(): - return ['Progress: %d / %d . Current file: %s' % (len(done_paths), image_paths_total, str(filepath.name) if filepath is not None else "end" ), - '[Left mouse button] - mark include mask.', - '[Right mouse button] - mark exclude mask.', - '[Middle mouse button] - finish current poly.', - '[Mouse wheel] - undo/redo poly or point. [+ctrl] - undo to begin/redo to end', - '[q] - prev image. [w] - skip and move to %s. [e] - save and move to %s. ' % (skipped_path.name, confirmed_path.name), - '[z] - prev image. [x] - skip. [c] - save. ', - 'hold [shift] - speed up the frame counter by 10.', - '[-/+] - window zoom [esc] - quit', - ] - - try: - ed = MaskEditor(img, - [ (done_images_types[name], cached_images[name]) for name in prev_image_paths_names ], - [ (0, cached_images[name]) for name in next_image_paths_names ], - mask, ie_polys, get_status_lines_func) - except Exception as e: - print(e) - continue - - next = False - while not next: - io.process_messages(0.005) - - if jobs_count() == 0: - for (x,y,ev,flags) in io.get_mouse_events(wnd_name): - x, y = int (x / zoom_factor), int(y / zoom_factor) - ed.set_mouse_pos(x, y) - if filepath is not None: - if ev == io.EVENT_LBUTTONDOWN: - ed.mask_point(1) - elif ev == io.EVENT_RBUTTONDOWN: - ed.mask_point(0) - elif ev == io.EVENT_MBUTTONDOWN: - ed.mask_finish() - elif ev == io.EVENT_MOUSEWHEEL: - if flags & 0x80000000 != 0: - if flags & 0x8 != 0: - ed.undo_to_begin_point() - else: - ed.undo_point() - else: - if flags & 0x8 != 0: - ed.redo_to_end_point() - else: - ed.redo_point() - - for key, chr_key, ctrl_pressed, alt_pressed, shift_pressed in io.get_key_events(wnd_name): - if chr_key == 'q' or chr_key == 'z': - do_prev_count = 1 if not shift_pressed else 10 - elif chr_key == '-': - zoom_factor = np.clip (zoom_factor-0.1, 0.1, 4.0) - ed.set_screen_changed() - elif chr_key == '+': - zoom_factor = np.clip (zoom_factor+0.1, 0.1, 4.0) - ed.set_screen_changed() - elif key == 27: #esc - is_exit = True - next = True - break - elif filepath is not None: - if chr_key == 'e': - do_save_move_count = 1 if not shift_pressed else 10 - elif chr_key == 'c': - do_save_count = 1 if not shift_pressed else 10 - elif chr_key == 'w': - do_skip_move_count = 1 if not shift_pressed else 10 - elif chr_key == 'x': - do_skip_count = 1 if not shift_pressed else 10 - - if do_prev_count > 0: - do_prev_count -= 1 - if len(done_paths) > 0: - if filepath is not None: - image_paths.insert(0, filepath) - - filepath = done_paths.pop(-1) - done_images_types[filepath.name] = 0 - - if filepath.parent != input_path: - new_filename_path = input_path / filepath.name - filepath.rename ( new_filename_path ) - image_paths.insert(0, new_filename_path) - else: - image_paths.insert(0, filepath) - - next = True - elif filepath is not None: - if do_save_move_count > 0: - do_save_move_count -= 1 - - ed.mask_finish() - dflimg.embed_and_set (str(filepath), ie_polys=ed.get_ie_polys() ) - - done_paths += [ confirmed_path / filepath.name ] - done_images_types[filepath.name] = 2 - filepath.rename(done_paths[-1]) - - next = True - elif do_save_count > 0: - do_save_count -= 1 - - ed.mask_finish() - dflimg.embed_and_set (str(filepath), ie_polys=ed.get_ie_polys() ) - - done_paths += [ filepath ] - done_images_types[filepath.name] = 2 - - next = True - elif do_skip_move_count > 0: - do_skip_move_count -= 1 - - done_paths += [ skipped_path / filepath.name ] - done_images_types[filepath.name] = 1 - filepath.rename(done_paths[-1]) - - next = True - elif do_skip_count > 0: - do_skip_count -= 1 - - done_paths += [ filepath ] - done_images_types[filepath.name] = 1 - - next = True - else: - do_save_move_count = do_save_count = do_skip_move_count = do_skip_count = 0 - - if jobs_count() == 0: - if ed.switch_screen_changed(): - screen = ed.make_screen() - if zoom_factor != 1.0: - h,w,c = screen.shape - screen = cv2.resize ( screen, ( int(w*zoom_factor), int(h*zoom_factor) ) ) - io.show_image (wnd_name, screen ) - - - io.process_messages(0.005) - - io.destroy_all_windows() - +import os +import sys +import time +import traceback +from pathlib import Path + +import cv2 +import numpy as np +import numpy.linalg as npl + +import imagelib +from facelib import LandmarksProcessor +from imagelib import IEPolys +from interact import interact as io +from utils import Path_utils +from utils.cv2_utils import * +from utils.DFLJPG import DFLJPG +from utils.DFLPNG import DFLPNG + +class MaskEditor: + STATE_NONE=0 + STATE_MASKING=1 + + def __init__(self, img, prev_images, next_images, mask=None, ie_polys=None, get_status_lines_func=None): + self.img = imagelib.normalize_channels (img,3) + h, w, c = img.shape + + if h != w and w != 256: + #to support any square res, scale img,mask and ie_polys to 256, then scale ie_polys back on .get_ie_polys() + raise Exception ("MaskEditor does not support image size != 256x256") + + ph, pw = h // 4, w // 4 #pad wh + + self.prev_images = prev_images + self.next_images = next_images + + if mask is not None: + self.mask = imagelib.normalize_channels (mask,3) + else: + self.mask = np.zeros ( (h,w,3) ) + self.get_status_lines_func = get_status_lines_func + + self.state_prop = self.STATE_NONE + + self.w, self.h = w, h + self.pw, self.ph = pw, ph + self.pwh = np.array([self.pw, self.ph]) + self.pwh2 = np.array([self.pw*2, self.ph*2]) + self.sw, self.sh = w+pw*2, h+ph*2 + self.prwh = 64 #preview wh + + if ie_polys is None: + ie_polys = IEPolys() + self.ie_polys = ie_polys + + self.polys_mask = None + self.preview_images = None + + self.mouse_x = self.mouse_y = 9999 + self.screen_status_block = None + self.screen_status_block_dirty = True + self.screen_changed = True + + def set_state(self, state): + self.state = state + + @property + def state(self): + return self.state_prop + + @state.setter + def state(self, value): + self.state_prop = value + if value == self.STATE_MASKING: + self.ie_polys.dirty = True + + def get_mask(self): + if self.ie_polys.switch_dirty(): + self.screen_status_block_dirty = True + self.ie_mask = img = self.mask.copy() + + self.ie_polys.overlay_mask(img) + + return img + return self.ie_mask + + def get_screen_overlay(self): + img = np.zeros ( (self.sh, self.sw, 3) ) + + if self.state == self.STATE_MASKING: + mouse_xy = self.mouse_xy.copy() + self.pwh + l = self.ie_polys.n_list() + if l.n > 0: + p = l.cur_point().copy() + self.pwh + color = (0,1,0) if l.type == 1 else (0,0,1) + cv2.line(img, tuple(p), tuple(mouse_xy), color ) + + return img + + def undo_to_begin_point(self): + while not self.undo_point(): + pass + + def undo_point(self): + self.screen_changed = True + if self.state == self.STATE_NONE: + if self.ie_polys.n > 0: + self.state = self.STATE_MASKING + + if self.state == self.STATE_MASKING: + if self.ie_polys.n_list().n_dec() == 0 and \ + self.ie_polys.n_dec() == 0: + self.state = self.STATE_NONE + else: + return False + + return True + + def redo_to_end_point(self): + while not self.redo_point(): + pass + + def redo_point(self): + self.screen_changed = True + if self.state == self.STATE_NONE: + if self.ie_polys.n_max > 0: + self.state = self.STATE_MASKING + if self.ie_polys.n == 0: + self.ie_polys.n_inc() + + if self.state == self.STATE_MASKING: + while True: + l = self.ie_polys.n_list() + if l.n_inc() == l.n_max: + if self.ie_polys.n == self.ie_polys.n_max: + break + self.ie_polys.n_inc() + else: + return False + + return True + + def combine_screens(self, screens): + + screens_len = len(screens) + + new_screens = [] + for screen, padded_overlay in screens: + screen_img = np.zeros( (self.sh, self.sw, 3), dtype=np.float32 ) + + screen = imagelib.normalize_channels (screen, 3) + h,w,c = screen.shape + + screen_img[self.ph:-self.ph, self.pw:-self.pw, :] = screen + + if padded_overlay is not None: + screen_img = screen_img + padded_overlay + + screen_img = np.clip(screen_img*255, 0, 255).astype(np.uint8) + new_screens.append(screen_img) + + return np.concatenate (new_screens, axis=1) + + def get_screen_status_block(self, w, c): + if self.screen_status_block_dirty: + self.screen_status_block_dirty = False + lines = [ + 'Polys current/max = %d/%d' % (self.ie_polys.n, self.ie_polys.n_max), + ] + if self.get_status_lines_func is not None: + lines += self.get_status_lines_func() + + lines_count = len(lines) + + + h_line = 21 + h = lines_count * h_line + img = np.ones ( (h,w,c) ) * 0.1 + + for i in range(lines_count): + img[ i*h_line:(i+1)*h_line, 0:w] += \ + imagelib.get_text_image ( (h_line,w,c), lines[i], color=[0.8]*c ) + + self.screen_status_block = np.clip(img*255, 0, 255).astype(np.uint8) + + return self.screen_status_block + + def set_screen_status_block_dirty(self): + self.screen_status_block_dirty = True + + def set_screen_changed(self): + self.screen_changed = True + + def switch_screen_changed(self): + result = self.screen_changed + self.screen_changed = False + return result + + def make_screen(self): + screen_overlay = self.get_screen_overlay() + final_mask = self.get_mask() + + masked_img = self.img*final_mask*0.5 + self.img*(1-final_mask) + + pink = np.full ( (self.h, self.w, 3), (1,0,1) ) + pink_masked_img = self.img*final_mask + pink*(1-final_mask) + + + + + screens = [ (self.img, screen_overlay), + (masked_img, screen_overlay), + (pink_masked_img, screen_overlay), + ] + screens = self.combine_screens(screens) + + if self.preview_images is None: + sh,sw,sc = screens.shape + + prh, prw = self.prwh, self.prwh + + total_w = sum ([ img.shape[1] for (t,img) in self.prev_images ]) + \ + sum ([ img.shape[1] for (t,img) in self.next_images ]) + + total_images_len = len(self.prev_images) + len(self.next_images) + + max_hor_images_count = sw // prw + max_side_images_count = (max_hor_images_count - 1) // 2 + + prev_images = self.prev_images[-max_side_images_count:] + next_images = self.next_images[:max_side_images_count] + + border = 2 + + max_wh_bordered = (prw-border*2, prh-border*2) + + prev_images = [ (t, cv2.resize( imagelib.normalize_channels(img, 3), max_wh_bordered )) for t,img in prev_images ] + next_images = [ (t, cv2.resize( imagelib.normalize_channels(img, 3), max_wh_bordered )) for t,img in next_images ] + + for images in [prev_images, next_images]: + for i, (t, img) in enumerate(images): + new_img = np.zeros ( (prh,prw, sc) ) + new_img[border:-border,border:-border] = img + + if t == 2: + cv2.line (new_img, ( prw//2, int(prh//1.5) ), (int(prw/1.5), prh ) , (0,1,0), thickness=2 ) + cv2.line (new_img, ( int(prw/1.5), prh ), ( prw, prh // 2 ) , (0,1,0), thickness=2 ) + elif t == 1: + cv2.line (new_img, ( prw//2, prh//2 ), ( prw, prh ) , (0,0,1), thickness=2 ) + cv2.line (new_img, ( prw//2, prh ), ( prw, prh // 2 ) , (0,0,1), thickness=2 ) + + images[i] = new_img + + + preview_images = [] + if len(prev_images) > 0: + preview_images += [ np.concatenate (prev_images, axis=1) ] + + img = np.full ( (prh,prw, sc), (0,0,1), dtype=np.float ) + img[border:-border,border:-border] = cv2.resize( self.img, max_wh_bordered ) + + preview_images += [ img ] + + if len(next_images) > 0: + preview_images += [ np.concatenate (next_images, axis=1) ] + + preview_images = np.concatenate ( preview_images, axis=1 ) + + left_pad = sw // 2 - len(prev_images) * prw - prw // 2 + right_pad = sw // 2 - len(next_images) * prw - prw // 2 + + preview_images = np.concatenate ([np.zeros ( (preview_images.shape[0], left_pad, preview_images.shape[2]) ), + preview_images, + np.zeros ( (preview_images.shape[0], right_pad, preview_images.shape[2]) ) + ], axis=1) + self.preview_images = np.clip(preview_images * 255, 0, 255 ).astype(np.uint8) + + status_img = self.get_screen_status_block( screens.shape[1], screens.shape[2] ) + + result = np.concatenate ( [self.preview_images, screens, status_img], axis=0 ) + + return result + + def mask_finish(self, n_clip=True): + if self.state == self.STATE_MASKING: + self.screen_changed = True + if self.ie_polys.n_list().n <= 2: + self.ie_polys.n_dec() + self.state = self.STATE_NONE + if n_clip: + self.ie_polys.n_clip() + + def set_mouse_pos(self,x,y): + if self.preview_images is not None: + y -= self.preview_images.shape[0] + + mouse_x = x % (self.sw) - self.pw + mouse_y = y % (self.sh) - self.ph + + + + if mouse_x != self.mouse_x or mouse_y != self.mouse_y: + self.mouse_xy = np.array( [mouse_x, mouse_y] ) + self.mouse_x, self.mouse_y = self.mouse_xy + self.screen_changed = True + + def mask_point(self, type): + self.screen_changed = True + if self.state == self.STATE_MASKING and \ + self.ie_polys.n_list().type != type: + self.mask_finish() + + elif self.state == self.STATE_NONE: + self.state = self.STATE_MASKING + self.ie_polys.add(type) + + if self.state == self.STATE_MASKING: + self.ie_polys.n_list().add (self.mouse_x, self.mouse_y) + + def get_ie_polys(self): + return self.ie_polys + +def mask_editor_main(input_dir, confirmed_dir=None, skipped_dir=None): + input_path = Path(input_dir) + + confirmed_path = Path(confirmed_dir) + skipped_path = Path(skipped_dir) + + if not input_path.exists(): + raise ValueError('Input directory not found. Please ensure it exists.') + + if not confirmed_path.exists(): + confirmed_path.mkdir(parents=True) + + if not skipped_path.exists(): + skipped_path.mkdir(parents=True) + + wnd_name = "MaskEditor tool" + io.named_window (wnd_name) + io.capture_mouse(wnd_name) + io.capture_keys(wnd_name) + + cached_images = {} + + image_paths = [ Path(x) for x in Path_utils.get_image_paths(input_path)] + done_paths = [] + done_images_types = {} + image_paths_total = len(image_paths) + + zoom_factor = 1.0 + preview_images_count = 9 + target_wh = 256 + + do_prev_count = 0 + do_save_move_count = 0 + do_save_count = 0 + do_skip_move_count = 0 + do_skip_count = 0 + + def jobs_count(): + return do_prev_count + do_save_move_count + do_save_count + do_skip_move_count + do_skip_count + + is_exit = False + while not is_exit: + + if len(image_paths) > 0: + filepath = image_paths.pop(0) + else: + filepath = None + + next_image_paths = image_paths[0:preview_images_count] + next_image_paths_names = [ path.name for path in next_image_paths ] + prev_image_paths = done_paths[-preview_images_count:] + prev_image_paths_names = [ path.name for path in prev_image_paths ] + + for key in list( cached_images.keys() ): + if key not in prev_image_paths_names and \ + key not in next_image_paths_names: + cached_images.pop(key) + + for paths in [prev_image_paths, next_image_paths]: + for path in paths: + if path.name not in cached_images: + cached_images[path.name] = cv2_imread(str(path)) / 255.0 + + if filepath is not None: + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + continue + else: + lmrks = dflimg.get_landmarks() + ie_polys = dflimg.get_ie_polys() + fanseg_mask = dflimg.get_fanseg_mask() + + if filepath.name in cached_images: + img = cached_images[filepath.name] + else: + img = cached_images[filepath.name] = cv2_imread(str(filepath)) / 255.0 + + if fanseg_mask is not None: + mask = fanseg_mask + else: + mask = LandmarksProcessor.get_image_hull_mask( img.shape, lmrks) + else: + img = np.zeros ( (target_wh,target_wh,3) ) + mask = np.ones ( (target_wh,target_wh,3) ) + ie_polys = None + + def get_status_lines_func(): + return ['Progress: %d / %d . Current file: %s' % (len(done_paths), image_paths_total, str(filepath.name) if filepath is not None else "end" ), + '[Left mouse button] - mark include mask.', + '[Right mouse button] - mark exclude mask.', + '[Middle mouse button] - finish current poly.', + '[Mouse wheel] - undo/redo poly or point. [+ctrl] - undo to begin/redo to end', + '[q] - prev image. [w] - skip and move to %s. [e] - save and move to %s. ' % (skipped_path.name, confirmed_path.name), + '[z] - prev image. [x] - skip. [c] - save. ', + 'hold [shift] - speed up the frame counter by 10.', + '[-/+] - window zoom [esc] - quit', + ] + + try: + ed = MaskEditor(img, + [ (done_images_types[name], cached_images[name]) for name in prev_image_paths_names ], + [ (0, cached_images[name]) for name in next_image_paths_names ], + mask, ie_polys, get_status_lines_func) + except Exception as e: + print(e) + continue + + next = False + while not next: + io.process_messages(0.005) + + if jobs_count() == 0: + for (x,y,ev,flags) in io.get_mouse_events(wnd_name): + x, y = int (x / zoom_factor), int(y / zoom_factor) + ed.set_mouse_pos(x, y) + if filepath is not None: + if ev == io.EVENT_LBUTTONDOWN: + ed.mask_point(1) + elif ev == io.EVENT_RBUTTONDOWN: + ed.mask_point(0) + elif ev == io.EVENT_MBUTTONDOWN: + ed.mask_finish() + elif ev == io.EVENT_MOUSEWHEEL: + if flags & 0x80000000 != 0: + if flags & 0x8 != 0: + ed.undo_to_begin_point() + else: + ed.undo_point() + else: + if flags & 0x8 != 0: + ed.redo_to_end_point() + else: + ed.redo_point() + + for key, chr_key, ctrl_pressed, alt_pressed, shift_pressed in io.get_key_events(wnd_name): + if chr_key == 'q' or chr_key == 'z': + do_prev_count = 1 if not shift_pressed else 10 + elif chr_key == '-': + zoom_factor = np.clip (zoom_factor-0.1, 0.1, 4.0) + ed.set_screen_changed() + elif chr_key == '+': + zoom_factor = np.clip (zoom_factor+0.1, 0.1, 4.0) + ed.set_screen_changed() + elif key == 27: #esc + is_exit = True + next = True + break + elif filepath is not None: + if chr_key == 'e': + do_save_move_count = 1 if not shift_pressed else 10 + elif chr_key == 'c': + do_save_count = 1 if not shift_pressed else 10 + elif chr_key == 'w': + do_skip_move_count = 1 if not shift_pressed else 10 + elif chr_key == 'x': + do_skip_count = 1 if not shift_pressed else 10 + + if do_prev_count > 0: + do_prev_count -= 1 + if len(done_paths) > 0: + if filepath is not None: + image_paths.insert(0, filepath) + + filepath = done_paths.pop(-1) + done_images_types[filepath.name] = 0 + + if filepath.parent != input_path: + new_filename_path = input_path / filepath.name + filepath.rename ( new_filename_path ) + image_paths.insert(0, new_filename_path) + else: + image_paths.insert(0, filepath) + + next = True + elif filepath is not None: + if do_save_move_count > 0: + do_save_move_count -= 1 + + ed.mask_finish() + dflimg.embed_and_set (str(filepath), ie_polys=ed.get_ie_polys() ) + + done_paths += [ confirmed_path / filepath.name ] + done_images_types[filepath.name] = 2 + filepath.rename(done_paths[-1]) + + next = True + elif do_save_count > 0: + do_save_count -= 1 + + ed.mask_finish() + dflimg.embed_and_set (str(filepath), ie_polys=ed.get_ie_polys() ) + + done_paths += [ filepath ] + done_images_types[filepath.name] = 2 + + next = True + elif do_skip_move_count > 0: + do_skip_move_count -= 1 + + done_paths += [ skipped_path / filepath.name ] + done_images_types[filepath.name] = 1 + filepath.rename(done_paths[-1]) + + next = True + elif do_skip_count > 0: + do_skip_count -= 1 + + done_paths += [ filepath ] + done_images_types[filepath.name] = 1 + + next = True + else: + do_save_move_count = do_save_count = do_skip_move_count = do_skip_count = 0 + + if jobs_count() == 0: + if ed.switch_screen_changed(): + screen = ed.make_screen() + if zoom_factor != 1.0: + h,w,c = screen.shape + screen = cv2.resize ( screen, ( int(w*zoom_factor), int(h*zoom_factor) ) ) + io.show_image (wnd_name, screen ) + + + io.process_messages(0.005) + + io.destroy_all_windows() + diff --git a/mainscripts/Sorter.py b/mainscripts/Sorter.py index f83ab70..7ca5ab2 100644 --- a/mainscripts/Sorter.py +++ b/mainscripts/Sorter.py @@ -1,803 +1,803 @@ -import os -import sys -import operator -import numpy as np -import cv2 -from shutil import copyfile -from pathlib import Path -from utils import Path_utils -from utils.DFLPNG import DFLPNG -from utils.DFLJPG import DFLJPG -from utils.cv2_utils import * -from facelib import LandmarksProcessor -from joblib import Subprocessor -import multiprocessing -from interact import interact as io -from imagelib import estimate_sharpness - -class BlurEstimatorSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - self.log_info('Running on %s.' % (client_dict['device_name']) ) - - #override - def process_data(self, data): - filepath = Path( data[0] ) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is not None: - image = cv2_imread( str(filepath) ) - return [ str(filepath), estimate_sharpness(image) ] - else: - self.log_err ("%s is not a dfl image file" % (filepath.name) ) - return [ str(filepath), 0 ] - - #override - def get_data_name (self, data): - #return string identificator of your data - return data[0] - - #override - def __init__(self, input_data ): - self.input_data = input_data - self.img_list = [] - self.trash_img_list = [] - super().__init__('BlurEstimator', BlurEstimatorSubprocessor.Cli, 60) - - #override - def on_clients_initialized(self): - io.progress_bar ("", len (self.input_data)) - - #override - def on_clients_finalized(self): - io.progress_bar_close () - - #override - def process_info_generator(self): - for i in range(0, multiprocessing.cpu_count() ): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - } - - #override - def get_data(self, host_dict): - if len (self.input_data) > 0: - return self.input_data.pop(0) - - return None - - #override - def on_data_return (self, host_dict, data): - self.input_data.insert(0, data) - - #override - def on_result (self, host_dict, data, result): - if result[1] == 0: - self.trash_img_list.append ( result ) - else: - self.img_list.append ( result ) - - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.img_list, self.trash_img_list - - -def sort_by_blur(input_path): - io.log_info ("Sorting by blur...") - - img_list = [ (filename,[]) for filename in Path_utils.get_image_paths(input_path) ] - img_list, trash_img_list = BlurEstimatorSubprocessor (img_list).run() - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - - return img_list, trash_img_list - -def sort_by_face(input_path): - io.log_info ("Sorting by face similarity...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - img_list.append( [str(filepath), dflimg.get_landmarks()] ) - - - img_list_len = len(img_list) - for i in io.progress_bar_generator ( range(0, img_list_len-1), "Sorting"): - min_score = float("inf") - j_min_score = i+1 - for j in range(i+1,len(img_list)): - - fl1 = img_list[i][1] - fl2 = img_list[j][1] - score = np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) - - if score < min_score: - min_score = score - j_min_score = j - img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] - - return img_list, trash_img_list - -def sort_by_face_dissim(input_path): - - io.log_info ("Sorting by face dissimilarity...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - img_list.append( [str(filepath), dflimg.get_landmarks(), 0 ] ) - - img_list_len = len(img_list) - for i in io.progress_bar_generator( range(img_list_len-1), "Sorting"): - score_total = 0 - for j in range(i+1,len(img_list)): - if i == j: - continue - fl1 = img_list[i][1] - fl2 = img_list[j][1] - score_total += np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) - - img_list[i][2] = score_total - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) - - return img_list, trash_img_list - -def sort_by_face_yaw(input_path): - io.log_info ("Sorting by face yaw...") - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - pitch_yaw_roll = dflimg.get_pitch_yaw_roll() - if pitch_yaw_roll is not None: - pitch, yaw, roll = pitch_yaw_roll - else: - pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) - - img_list.append( [str(filepath), yaw ] ) - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - - return img_list, trash_img_list - -def sort_by_face_pitch(input_path): - io.log_info ("Sorting by face pitch...") - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - pitch_yaw_roll = dflimg.get_pitch_yaw_roll() - if pitch_yaw_roll is not None: - pitch, yaw, roll = pitch_yaw_roll - else: - pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) - - img_list.append( [str(filepath), pitch ] ) - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - - return img_list, trash_img_list - -class HistSsimSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - - #override - def process_data(self, data): - img_list = [] - for x in data: - img = cv2_imread(x) - img_list.append ([x, cv2.calcHist([img], [0], None, [256], [0, 256]), - cv2.calcHist([img], [1], None, [256], [0, 256]), - cv2.calcHist([img], [2], None, [256], [0, 256]) - ]) - - img_list_len = len(img_list) - for i in range(img_list_len-1): - min_score = float("inf") - j_min_score = i+1 - for j in range(i+1,len(img_list)): - score = cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + \ - cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + \ - cv2.compareHist(img_list[i][3], img_list[j][3], cv2.HISTCMP_BHATTACHARYYA) - if score < min_score: - min_score = score - j_min_score = j - img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] - - self.progress_bar_inc(1) - - return img_list - - #override - def get_data_name (self, data): - return "Bunch of images" - - #override - def __init__(self, img_list ): - self.img_list = img_list - self.img_list_len = len(img_list) - - slice_count = 20000 - sliced_count = self.img_list_len // slice_count - - if sliced_count > 12: - sliced_count = 11.9 - slice_count = int(self.img_list_len / sliced_count) - sliced_count = self.img_list_len // slice_count - - self.img_chunks_list = [ self.img_list[i*slice_count : (i+1)*slice_count] for i in range(sliced_count) ] + \ - [ self.img_list[sliced_count*slice_count:] ] - - self.result = [] - super().__init__('HistSsim', HistSsimSubprocessor.Cli, 0) - - #override - def process_info_generator(self): - for i in range( len(self.img_chunks_list) ): - yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, - 'device_name': 'CPU%d' % (i) - } - #override - def on_clients_initialized(self): - io.progress_bar ("Sorting", len(self.img_list)) - io.progress_bar_inc(len(self.img_chunks_list)) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def get_data(self, host_dict): - if len (self.img_chunks_list) > 0: - return self.img_chunks_list.pop(0) - return None - - #override - def on_data_return (self, host_dict, data): - raise Exception("Fail to process data. Decrease number of images and try again.") - - #override - def on_result (self, host_dict, data, result): - self.result += result - return 0 - - #override - def get_result(self): - return self.result - -def sort_by_hist(input_path): - io.log_info ("Sorting by histogram similarity...") - img_list = HistSsimSubprocessor(Path_utils.get_image_paths(input_path)).run() - return img_list - -class HistDissimSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - self.img_list = client_dict['img_list'] - self.img_list_len = len(self.img_list) - - #override - def process_data(self, data): - i = data[0] - score_total = 0 - for j in range( 0, self.img_list_len): - if i == j: - continue - score_total += cv2.compareHist(self.img_list[i][1], self.img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) - - return score_total - - #override - def get_data_name (self, data): - #return string identificator of your data - return self.img_list[data[0]][0] - - #override - def __init__(self, img_list ): - self.img_list = img_list - self.img_list_range = [i for i in range(0, len(img_list) )] - self.result = [] - super().__init__('HistDissim', HistDissimSubprocessor.Cli, 60) - - #override - def on_clients_initialized(self): - io.progress_bar ("Sorting", len (self.img_list) ) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def process_info_generator(self): - for i in range(0, min(multiprocessing.cpu_count(), 8) ): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'img_list' : self.img_list - } - #override - def get_data(self, host_dict): - if len (self.img_list_range) > 0: - return [self.img_list_range.pop(0)] - - return None - - #override - def on_data_return (self, host_dict, data): - self.img_list_range.insert(0, data[0]) - - #override - def on_result (self, host_dict, data, result): - self.img_list[data[0]][2] = result - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.img_list - -def sort_by_hist_dissim(input_path): - io.log_info ("Sorting by histogram dissimilarity...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ([str(filepath)]) - continue - - image = cv2_imread(str(filepath)) - face_mask = LandmarksProcessor.get_image_hull_mask (image.shape, dflimg.get_landmarks()) - image = (image*face_mask).astype(np.uint8) - - img_list.append ([str(filepath), cv2.calcHist([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)], [0], None, [256], [0, 256]), 0 ]) - - img_list = HistDissimSubprocessor(img_list).run() - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) - - return img_list, trash_img_list - -def sort_by_brightness(input_path): - io.log_info ("Sorting by brightness...") - img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,2].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - return img_list - -def sort_by_hue(input_path): - io.log_info ("Sorting by hue...") - img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,0].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - return img_list - -def sort_by_black(input_path): - io.log_info ("Sorting by amount of black pixels...") - - img_list = [] - for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - img = cv2_imread(x) - img_list.append ([x, img[(img == 0)].size ]) - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=False) - - return img_list - -def sort_by_origname(input_path): - io.log_info ("Sort by original filename...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append( [str(filepath)] ) - continue - - img_list.append( [str(filepath), dflimg.get_source_filename()] ) - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(1)) - return img_list, trash_img_list - -def sort_by_oneface_in_image(input_path): - io.log_info ("Sort by one face in images...") - image_paths = Path_utils.get_image_paths(input_path) - a = np.array ([ ( int(x[0]), int(x[1]) ) \ - for x in [ Path(filepath).stem.split('_') for filepath in image_paths ] if len(x) == 2 - ]) - if len(a) > 0: - idxs = np.ndarray.flatten ( np.argwhere ( a[:,1] != 0 ) ) - idxs = np.unique ( a[idxs][:,0] ) - idxs = np.ndarray.flatten ( np.argwhere ( np.array([ x[0] in idxs for x in a ]) == True ) ) - if len(idxs) > 0: - io.log_info ("Found %d images." % (len(idxs)) ) - img_list = [ (path,) for i,path in enumerate(image_paths) if i not in idxs ] - trash_img_list = [ (image_paths[x],) for x in idxs ] - return img_list, trash_img_list - return [], [] - -class FinalLoaderSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - self.include_by_blur = client_dict['include_by_blur'] - - #override - def process_data(self, data): - filepath = Path(data[0]) - - try: - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - self.log_err("%s is not a dfl image file" % (filepath.name)) - return [ 1, [str(filepath)] ] - - bgr = cv2_imread(str(filepath)) - if bgr is None: - raise Exception ("Unable to load %s" % (filepath.name) ) - - gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) - sharpness = estimate_sharpness(gray) if self.include_by_blur else 0 - pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) - - hist = cv2.calcHist([gray], [0], None, [256], [0, 256]) - except Exception as e: - self.log_err (e) - return [ 1, [str(filepath)] ] - - return [ 0, [str(filepath), sharpness, hist, yaw ] ] - - #override - def get_data_name (self, data): - #return string identificator of your data - return data[0] - - #override - def __init__(self, img_list, include_by_blur ): - self.img_list = img_list - - self.include_by_blur = include_by_blur - self.result = [] - self.result_trash = [] - - super().__init__('FinalLoader', FinalLoaderSubprocessor.Cli, 60) - - #override - def on_clients_initialized(self): - io.progress_bar ("Loading", len (self.img_list)) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def process_info_generator(self): - for i in range(0, min(multiprocessing.cpu_count(), 8) ): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'include_by_blur': self.include_by_blur - } - - #override - def get_data(self, host_dict): - if len (self.img_list) > 0: - return [self.img_list.pop(0)] - - return None - - #override - def on_data_return (self, host_dict, data): - self.img_list.insert(0, data[0]) - - #override - def on_result (self, host_dict, data, result): - if result[0] == 0: - self.result.append (result[1]) - else: - self.result_trash.append (result[1]) - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.result, self.result_trash - -class FinalHistDissimSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - - #override - def process_data(self, data): - idx, img_list = data - for i in range( len(img_list) ): - score_total = 0 - for j in range( len(img_list) ): - if i == j: - continue - score_total += cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) - img_list[i][3] = score_total - img_list = sorted(img_list, key=operator.itemgetter(3), reverse=True) - return idx, img_list - - #override - def get_data_name (self, data): - return "Bunch of images" - - #override - def __init__(self, yaws_sample_list ): - self.yaws_sample_list = yaws_sample_list - self.yaws_sample_list_len = len(yaws_sample_list) - - self.yaws_sample_list_idxs = [ i for i in range(self.yaws_sample_list_len) if self.yaws_sample_list[i] is not None ] - self.result = [ None for _ in range(self.yaws_sample_list_len) ] - super().__init__('FinalHistDissimSubprocessor', FinalHistDissimSubprocessor.Cli) - - #override - def process_info_generator(self): - for i in range(min(multiprocessing.cpu_count(), 8) ): - yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, - 'device_name': 'CPU%d' % (i) - } - #override - def on_clients_initialized(self): - io.progress_bar ("Sort by hist-dissim", self.yaws_sample_list_len) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def get_data(self, host_dict): - if len (self.yaws_sample_list_idxs) > 0: - idx = self.yaws_sample_list_idxs.pop(0) - - return idx, self.yaws_sample_list[idx] - return None - - #override - def on_data_return (self, host_dict, data): - self.yaws_sample_list_idxs.insert(0, data[0]) - - #override - def on_result (self, host_dict, data, result): - idx, yaws_sample_list = data - self.result[idx] = yaws_sample_list - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.result - -def sort_final(input_path, include_by_blur=True): - io.log_info ("Performing final sort.") - - target_count = io.input_int ("Target number of images? (default:2000) : ", 2000) - - img_list, trash_img_list = FinalLoaderSubprocessor( Path_utils.get_image_paths(input_path), include_by_blur ).run() - final_img_list = [] - - grads = 128 - imgs_per_grad = round (target_count / grads) - - grads_space = np.linspace (-1.0,1.0,grads) - - yaws_sample_list = [None]*grads - for g in io.progress_bar_generator ( range(grads), "Sort by yaw"): - yaw = grads_space[g] - next_yaw = grads_space[g+1] if g < grads-1 else yaw - - yaw_samples = [] - for img in img_list: - s_yaw = -img[3] - if (g == 0 and s_yaw < next_yaw) or \ - (g < grads-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ - (g == grads-1 and s_yaw >= yaw): - yaw_samples += [ img ] - if len(yaw_samples) > 0: - yaws_sample_list[g] = yaw_samples - - total_lack = 0 - for g in io.progress_bar_generator ( range(grads), ""): - img_list = yaws_sample_list[g] - img_list_len = len(img_list) if img_list is not None else 0 - - lack = imgs_per_grad - img_list_len - total_lack += max(lack, 0) - - imgs_per_grad += total_lack // grads - - if include_by_blur: - sharpned_imgs_per_grad = imgs_per_grad*10 - for g in io.progress_bar_generator ( range (grads), "Sort by blur"): - img_list = yaws_sample_list[g] - if img_list is None: - continue - - img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - - if len(img_list) > sharpned_imgs_per_grad: - trash_img_list += img_list[sharpned_imgs_per_grad:] - img_list = img_list[0:sharpned_imgs_per_grad] - - yaws_sample_list[g] = img_list - - yaws_sample_list = FinalHistDissimSubprocessor(yaws_sample_list).run() - - for g in io.progress_bar_generator ( range (grads), "Fetching best"): - img_list = yaws_sample_list[g] - if img_list is None: - continue - - final_img_list += img_list[0:imgs_per_grad] - trash_img_list += img_list[imgs_per_grad:] - - return final_img_list, trash_img_list - -def final_process(input_path, img_list, trash_img_list): - if len(trash_img_list) != 0: - parent_input_path = input_path.parent - trash_path = parent_input_path / (input_path.stem + '_trash') - trash_path.mkdir (exist_ok=True) - - io.log_info ("Trashing %d items to %s" % ( len(trash_img_list), str(trash_path) ) ) - - for filename in Path_utils.get_image_paths(trash_path): - Path(filename).unlink() - - for i in io.progress_bar_generator( range(len(trash_img_list)), "Moving trash", leave=False): - src = Path (trash_img_list[i][0]) - dst = trash_path / src.name - try: - src.rename (dst) - except: - io.log_info ('fail to trashing %s' % (src.name) ) - - io.log_info ("") - - if len(img_list) != 0: - for i in io.progress_bar_generator( [*range(len(img_list))], "Renaming", leave=False): - src = Path (img_list[i][0]) - dst = input_path / ('%.5d_%s' % (i, src.name )) - try: - src.rename (dst) - except: - io.log_info ('fail to rename %s' % (src.name) ) - - for i in io.progress_bar_generator( [*range(len(img_list))], "Renaming"): - src = Path (img_list[i][0]) - src = input_path / ('%.5d_%s' % (i, src.name)) - dst = input_path / ('%.5d%s' % (i, src.suffix)) - try: - src.rename (dst) - except: - io.log_info ('fail to rename %s' % (src.name) ) - - - -def main (input_path, sort_by_method): - input_path = Path(input_path) - sort_by_method = sort_by_method.lower() - - io.log_info ("Running sort tool.\r\n") - - img_list = [] - trash_img_list = [] - if sort_by_method == 'blur': img_list, trash_img_list = sort_by_blur (input_path) - elif sort_by_method == 'face': img_list, trash_img_list = sort_by_face (input_path) - elif sort_by_method == 'face-dissim': img_list, trash_img_list = sort_by_face_dissim (input_path) - elif sort_by_method == 'face-yaw': img_list, trash_img_list = sort_by_face_yaw (input_path) - elif sort_by_method == 'face-pitch': img_list, trash_img_list = sort_by_face_pitch (input_path) - elif sort_by_method == 'hist': img_list = sort_by_hist (input_path) - elif sort_by_method == 'hist-dissim': img_list, trash_img_list = sort_by_hist_dissim (input_path) - elif sort_by_method == 'brightness': img_list = sort_by_brightness (input_path) - elif sort_by_method == 'hue': img_list = sort_by_hue (input_path) - elif sort_by_method == 'black': img_list = sort_by_black (input_path) - elif sort_by_method == 'origname': img_list, trash_img_list = sort_by_origname (input_path) - elif sort_by_method == 'oneface': img_list, trash_img_list = sort_by_oneface_in_image (input_path) - elif sort_by_method == 'final': img_list, trash_img_list = sort_final (input_path) - elif sort_by_method == 'final-no-blur': img_list, trash_img_list = sort_final (input_path, include_by_blur=False) - - final_process (input_path, img_list, trash_img_list) +import os +import sys +import operator +import numpy as np +import cv2 +from shutil import copyfile +from pathlib import Path +from utils import Path_utils +from utils.DFLPNG import DFLPNG +from utils.DFLJPG import DFLJPG +from utils.cv2_utils import * +from facelib import LandmarksProcessor +from joblib import Subprocessor +import multiprocessing +from interact import interact as io +from imagelib import estimate_sharpness + +class BlurEstimatorSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + + #override + def on_initialize(self, client_dict): + self.log_info('Running on %s.' % (client_dict['device_name']) ) + + #override + def process_data(self, data): + filepath = Path( data[0] ) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is not None: + image = cv2_imread( str(filepath) ) + return [ str(filepath), estimate_sharpness(image) ] + else: + self.log_err ("%s is not a dfl image file" % (filepath.name) ) + return [ str(filepath), 0 ] + + #override + def get_data_name (self, data): + #return string identificator of your data + return data[0] + + #override + def __init__(self, input_data ): + self.input_data = input_data + self.img_list = [] + self.trash_img_list = [] + super().__init__('BlurEstimator', BlurEstimatorSubprocessor.Cli, 60) + + #override + def on_clients_initialized(self): + io.progress_bar ("", len (self.input_data)) + + #override + def on_clients_finalized(self): + io.progress_bar_close () + + #override + def process_info_generator(self): + for i in range(0, multiprocessing.cpu_count() ): + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + } + + #override + def get_data(self, host_dict): + if len (self.input_data) > 0: + return self.input_data.pop(0) + + return None + + #override + def on_data_return (self, host_dict, data): + self.input_data.insert(0, data) + + #override + def on_result (self, host_dict, data, result): + if result[1] == 0: + self.trash_img_list.append ( result ) + else: + self.img_list.append ( result ) + + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.img_list, self.trash_img_list + + +def sort_by_blur(input_path): + io.log_info ("Sorting by blur...") + + img_list = [ (filename,[]) for filename in Path_utils.get_image_paths(input_path) ] + img_list, trash_img_list = BlurEstimatorSubprocessor (img_list).run() + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + return img_list, trash_img_list + +def sort_by_face(input_path): + io.log_info ("Sorting by face similarity...") + + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ( [str(filepath)] ) + continue + + img_list.append( [str(filepath), dflimg.get_landmarks()] ) + + + img_list_len = len(img_list) + for i in io.progress_bar_generator ( range(0, img_list_len-1), "Sorting"): + min_score = float("inf") + j_min_score = i+1 + for j in range(i+1,len(img_list)): + + fl1 = img_list[i][1] + fl2 = img_list[j][1] + score = np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) + + if score < min_score: + min_score = score + j_min_score = j + img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] + + return img_list, trash_img_list + +def sort_by_face_dissim(input_path): + + io.log_info ("Sorting by face dissimilarity...") + + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ( [str(filepath)] ) + continue + + img_list.append( [str(filepath), dflimg.get_landmarks(), 0 ] ) + + img_list_len = len(img_list) + for i in io.progress_bar_generator( range(img_list_len-1), "Sorting"): + score_total = 0 + for j in range(i+1,len(img_list)): + if i == j: + continue + fl1 = img_list[i][1] + fl2 = img_list[j][1] + score_total += np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) + + img_list[i][2] = score_total + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) + + return img_list, trash_img_list + +def sort_by_face_yaw(input_path): + io.log_info ("Sorting by face yaw...") + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ( [str(filepath)] ) + continue + + pitch_yaw_roll = dflimg.get_pitch_yaw_roll() + if pitch_yaw_roll is not None: + pitch, yaw, roll = pitch_yaw_roll + else: + pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) + + img_list.append( [str(filepath), yaw ] ) + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + return img_list, trash_img_list + +def sort_by_face_pitch(input_path): + io.log_info ("Sorting by face pitch...") + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ( [str(filepath)] ) + continue + + pitch_yaw_roll = dflimg.get_pitch_yaw_roll() + if pitch_yaw_roll is not None: + pitch, yaw, roll = pitch_yaw_roll + else: + pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) + + img_list.append( [str(filepath), pitch ] ) + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + return img_list, trash_img_list + +class HistSsimSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.log_info ('Running on %s.' % (client_dict['device_name']) ) + + #override + def process_data(self, data): + img_list = [] + for x in data: + img = cv2_imread(x) + img_list.append ([x, cv2.calcHist([img], [0], None, [256], [0, 256]), + cv2.calcHist([img], [1], None, [256], [0, 256]), + cv2.calcHist([img], [2], None, [256], [0, 256]) + ]) + + img_list_len = len(img_list) + for i in range(img_list_len-1): + min_score = float("inf") + j_min_score = i+1 + for j in range(i+1,len(img_list)): + score = cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][3], img_list[j][3], cv2.HISTCMP_BHATTACHARYYA) + if score < min_score: + min_score = score + j_min_score = j + img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] + + self.progress_bar_inc(1) + + return img_list + + #override + def get_data_name (self, data): + return "Bunch of images" + + #override + def __init__(self, img_list ): + self.img_list = img_list + self.img_list_len = len(img_list) + + slice_count = 20000 + sliced_count = self.img_list_len // slice_count + + if sliced_count > 12: + sliced_count = 11.9 + slice_count = int(self.img_list_len / sliced_count) + sliced_count = self.img_list_len // slice_count + + self.img_chunks_list = [ self.img_list[i*slice_count : (i+1)*slice_count] for i in range(sliced_count) ] + \ + [ self.img_list[sliced_count*slice_count:] ] + + self.result = [] + super().__init__('HistSsim', HistSsimSubprocessor.Cli, 0) + + #override + def process_info_generator(self): + for i in range( len(self.img_chunks_list) ): + yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, + 'device_name': 'CPU%d' % (i) + } + #override + def on_clients_initialized(self): + io.progress_bar ("Sorting", len(self.img_list)) + io.progress_bar_inc(len(self.img_chunks_list)) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def get_data(self, host_dict): + if len (self.img_chunks_list) > 0: + return self.img_chunks_list.pop(0) + return None + + #override + def on_data_return (self, host_dict, data): + raise Exception("Fail to process data. Decrease number of images and try again.") + + #override + def on_result (self, host_dict, data, result): + self.result += result + return 0 + + #override + def get_result(self): + return self.result + +def sort_by_hist(input_path): + io.log_info ("Sorting by histogram similarity...") + img_list = HistSsimSubprocessor(Path_utils.get_image_paths(input_path)).run() + return img_list + +class HistDissimSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.log_info ('Running on %s.' % (client_dict['device_name']) ) + self.img_list = client_dict['img_list'] + self.img_list_len = len(self.img_list) + + #override + def process_data(self, data): + i = data[0] + score_total = 0 + for j in range( 0, self.img_list_len): + if i == j: + continue + score_total += cv2.compareHist(self.img_list[i][1], self.img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + + return score_total + + #override + def get_data_name (self, data): + #return string identificator of your data + return self.img_list[data[0]][0] + + #override + def __init__(self, img_list ): + self.img_list = img_list + self.img_list_range = [i for i in range(0, len(img_list) )] + self.result = [] + super().__init__('HistDissim', HistDissimSubprocessor.Cli, 60) + + #override + def on_clients_initialized(self): + io.progress_bar ("Sorting", len (self.img_list) ) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def process_info_generator(self): + for i in range(0, min(multiprocessing.cpu_count(), 8) ): + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + 'img_list' : self.img_list + } + #override + def get_data(self, host_dict): + if len (self.img_list_range) > 0: + return [self.img_list_range.pop(0)] + + return None + + #override + def on_data_return (self, host_dict, data): + self.img_list_range.insert(0, data[0]) + + #override + def on_result (self, host_dict, data, result): + self.img_list[data[0]][2] = result + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.img_list + +def sort_by_hist_dissim(input_path): + io.log_info ("Sorting by histogram dissimilarity...") + + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append ([str(filepath)]) + continue + + image = cv2_imread(str(filepath)) + face_mask = LandmarksProcessor.get_image_hull_mask (image.shape, dflimg.get_landmarks()) + image = (image*face_mask).astype(np.uint8) + + img_list.append ([str(filepath), cv2.calcHist([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)], [0], None, [256], [0, 256]), 0 ]) + + img_list = HistDissimSubprocessor(img_list).run() + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) + + return img_list, trash_img_list + +def sort_by_brightness(input_path): + io.log_info ("Sorting by brightness...") + img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,2].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + return img_list + +def sort_by_hue(input_path): + io.log_info ("Sorting by hue...") + img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,0].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + return img_list + +def sort_by_black(input_path): + io.log_info ("Sorting by amount of black pixels...") + + img_list = [] + for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + img = cv2_imread(x) + img_list.append ([x, img[(img == 0)].size ]) + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=False) + + return img_list + +def sort_by_origname(input_path): + io.log_info ("Sort by original filename...") + + img_list = [] + trash_img_list = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + trash_img_list.append( [str(filepath)] ) + continue + + img_list.append( [str(filepath), dflimg.get_source_filename()] ) + + io.log_info ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1)) + return img_list, trash_img_list + +def sort_by_oneface_in_image(input_path): + io.log_info ("Sort by one face in images...") + image_paths = Path_utils.get_image_paths(input_path) + a = np.array ([ ( int(x[0]), int(x[1]) ) \ + for x in [ Path(filepath).stem.split('_') for filepath in image_paths ] if len(x) == 2 + ]) + if len(a) > 0: + idxs = np.ndarray.flatten ( np.argwhere ( a[:,1] != 0 ) ) + idxs = np.unique ( a[idxs][:,0] ) + idxs = np.ndarray.flatten ( np.argwhere ( np.array([ x[0] in idxs for x in a ]) == True ) ) + if len(idxs) > 0: + io.log_info ("Found %d images." % (len(idxs)) ) + img_list = [ (path,) for i,path in enumerate(image_paths) if i not in idxs ] + trash_img_list = [ (image_paths[x],) for x in idxs ] + return img_list, trash_img_list + return [], [] + +class FinalLoaderSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.log_info ('Running on %s.' % (client_dict['device_name']) ) + self.include_by_blur = client_dict['include_by_blur'] + + #override + def process_data(self, data): + filepath = Path(data[0]) + + try: + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + self.log_err("%s is not a dfl image file" % (filepath.name)) + return [ 1, [str(filepath)] ] + + bgr = cv2_imread(str(filepath)) + if bgr is None: + raise Exception ("Unable to load %s" % (filepath.name) ) + + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) + sharpness = estimate_sharpness(gray) if self.include_by_blur else 0 + pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() ) + + hist = cv2.calcHist([gray], [0], None, [256], [0, 256]) + except Exception as e: + self.log_err (e) + return [ 1, [str(filepath)] ] + + return [ 0, [str(filepath), sharpness, hist, yaw ] ] + + #override + def get_data_name (self, data): + #return string identificator of your data + return data[0] + + #override + def __init__(self, img_list, include_by_blur ): + self.img_list = img_list + + self.include_by_blur = include_by_blur + self.result = [] + self.result_trash = [] + + super().__init__('FinalLoader', FinalLoaderSubprocessor.Cli, 60) + + #override + def on_clients_initialized(self): + io.progress_bar ("Loading", len (self.img_list)) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def process_info_generator(self): + for i in range(0, min(multiprocessing.cpu_count(), 8) ): + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + 'include_by_blur': self.include_by_blur + } + + #override + def get_data(self, host_dict): + if len (self.img_list) > 0: + return [self.img_list.pop(0)] + + return None + + #override + def on_data_return (self, host_dict, data): + self.img_list.insert(0, data[0]) + + #override + def on_result (self, host_dict, data, result): + if result[0] == 0: + self.result.append (result[1]) + else: + self.result_trash.append (result[1]) + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.result, self.result_trash + +class FinalHistDissimSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + #override + def on_initialize(self, client_dict): + self.log_info ('Running on %s.' % (client_dict['device_name']) ) + + #override + def process_data(self, data): + idx, img_list = data + for i in range( len(img_list) ): + score_total = 0 + for j in range( len(img_list) ): + if i == j: + continue + score_total += cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + img_list[i][3] = score_total + img_list = sorted(img_list, key=operator.itemgetter(3), reverse=True) + return idx, img_list + + #override + def get_data_name (self, data): + return "Bunch of images" + + #override + def __init__(self, yaws_sample_list ): + self.yaws_sample_list = yaws_sample_list + self.yaws_sample_list_len = len(yaws_sample_list) + + self.yaws_sample_list_idxs = [ i for i in range(self.yaws_sample_list_len) if self.yaws_sample_list[i] is not None ] + self.result = [ None for _ in range(self.yaws_sample_list_len) ] + super().__init__('FinalHistDissimSubprocessor', FinalHistDissimSubprocessor.Cli) + + #override + def process_info_generator(self): + for i in range(min(multiprocessing.cpu_count(), 8) ): + yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, + 'device_name': 'CPU%d' % (i) + } + #override + def on_clients_initialized(self): + io.progress_bar ("Sort by hist-dissim", self.yaws_sample_list_len) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def get_data(self, host_dict): + if len (self.yaws_sample_list_idxs) > 0: + idx = self.yaws_sample_list_idxs.pop(0) + + return idx, self.yaws_sample_list[idx] + return None + + #override + def on_data_return (self, host_dict, data): + self.yaws_sample_list_idxs.insert(0, data[0]) + + #override + def on_result (self, host_dict, data, result): + idx, yaws_sample_list = data + self.result[idx] = yaws_sample_list + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.result + +def sort_final(input_path, include_by_blur=True): + io.log_info ("Performing final sort.") + + target_count = io.input_int ("Target number of images? (default:2000) : ", 2000) + + img_list, trash_img_list = FinalLoaderSubprocessor( Path_utils.get_image_paths(input_path), include_by_blur ).run() + final_img_list = [] + + grads = 128 + imgs_per_grad = round (target_count / grads) + + grads_space = np.linspace (-1.0,1.0,grads) + + yaws_sample_list = [None]*grads + for g in io.progress_bar_generator ( range(grads), "Sort by yaw"): + yaw = grads_space[g] + next_yaw = grads_space[g+1] if g < grads-1 else yaw + + yaw_samples = [] + for img in img_list: + s_yaw = -img[3] + if (g == 0 and s_yaw < next_yaw) or \ + (g < grads-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ + (g == grads-1 and s_yaw >= yaw): + yaw_samples += [ img ] + if len(yaw_samples) > 0: + yaws_sample_list[g] = yaw_samples + + total_lack = 0 + for g in io.progress_bar_generator ( range(grads), ""): + img_list = yaws_sample_list[g] + img_list_len = len(img_list) if img_list is not None else 0 + + lack = imgs_per_grad - img_list_len + total_lack += max(lack, 0) + + imgs_per_grad += total_lack // grads + + if include_by_blur: + sharpned_imgs_per_grad = imgs_per_grad*10 + for g in io.progress_bar_generator ( range (grads), "Sort by blur"): + img_list = yaws_sample_list[g] + if img_list is None: + continue + + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + if len(img_list) > sharpned_imgs_per_grad: + trash_img_list += img_list[sharpned_imgs_per_grad:] + img_list = img_list[0:sharpned_imgs_per_grad] + + yaws_sample_list[g] = img_list + + yaws_sample_list = FinalHistDissimSubprocessor(yaws_sample_list).run() + + for g in io.progress_bar_generator ( range (grads), "Fetching best"): + img_list = yaws_sample_list[g] + if img_list is None: + continue + + final_img_list += img_list[0:imgs_per_grad] + trash_img_list += img_list[imgs_per_grad:] + + return final_img_list, trash_img_list + +def final_process(input_path, img_list, trash_img_list): + if len(trash_img_list) != 0: + parent_input_path = input_path.parent + trash_path = parent_input_path / (input_path.stem + '_trash') + trash_path.mkdir (exist_ok=True) + + io.log_info ("Trashing %d items to %s" % ( len(trash_img_list), str(trash_path) ) ) + + for filename in Path_utils.get_image_paths(trash_path): + Path(filename).unlink() + + for i in io.progress_bar_generator( range(len(trash_img_list)), "Moving trash", leave=False): + src = Path (trash_img_list[i][0]) + dst = trash_path / src.name + try: + src.rename (dst) + except: + io.log_info ('fail to trashing %s' % (src.name) ) + + io.log_info ("") + + if len(img_list) != 0: + for i in io.progress_bar_generator( [*range(len(img_list))], "Renaming", leave=False): + src = Path (img_list[i][0]) + dst = input_path / ('%.5d_%s' % (i, src.name )) + try: + src.rename (dst) + except: + io.log_info ('fail to rename %s' % (src.name) ) + + for i in io.progress_bar_generator( [*range(len(img_list))], "Renaming"): + src = Path (img_list[i][0]) + src = input_path / ('%.5d_%s' % (i, src.name)) + dst = input_path / ('%.5d%s' % (i, src.suffix)) + try: + src.rename (dst) + except: + io.log_info ('fail to rename %s' % (src.name) ) + + + +def main (input_path, sort_by_method): + input_path = Path(input_path) + sort_by_method = sort_by_method.lower() + + io.log_info ("Running sort tool.\r\n") + + img_list = [] + trash_img_list = [] + if sort_by_method == 'blur': img_list, trash_img_list = sort_by_blur (input_path) + elif sort_by_method == 'face': img_list, trash_img_list = sort_by_face (input_path) + elif sort_by_method == 'face-dissim': img_list, trash_img_list = sort_by_face_dissim (input_path) + elif sort_by_method == 'face-yaw': img_list, trash_img_list = sort_by_face_yaw (input_path) + elif sort_by_method == 'face-pitch': img_list, trash_img_list = sort_by_face_pitch (input_path) + elif sort_by_method == 'hist': img_list = sort_by_hist (input_path) + elif sort_by_method == 'hist-dissim': img_list, trash_img_list = sort_by_hist_dissim (input_path) + elif sort_by_method == 'brightness': img_list = sort_by_brightness (input_path) + elif sort_by_method == 'hue': img_list = sort_by_hue (input_path) + elif sort_by_method == 'black': img_list = sort_by_black (input_path) + elif sort_by_method == 'origname': img_list, trash_img_list = sort_by_origname (input_path) + elif sort_by_method == 'oneface': img_list, trash_img_list = sort_by_oneface_in_image (input_path) + elif sort_by_method == 'final': img_list, trash_img_list = sort_final (input_path) + elif sort_by_method == 'final-no-blur': img_list, trash_img_list = sort_final (input_path, include_by_blur=False) + + final_process (input_path, img_list, trash_img_list) diff --git a/mainscripts/Trainer.py b/mainscripts/Trainer.py index 71a2ca5..b58e020 100644 --- a/mainscripts/Trainer.py +++ b/mainscripts/Trainer.py @@ -1,324 +1,324 @@ -import sys -import traceback -import queue -import threading -import time -import numpy as np -import itertools -from pathlib import Path -from utils import Path_utils -import imagelib -import cv2 -import models -from interact import interact as io - -def trainerThread (s2c, c2s, args, device_args): - while True: - try: - start_time = time.time() - - training_data_src_path = Path( args.get('training_data_src_dir', '') ) - training_data_dst_path = Path( args.get('training_data_dst_dir', '') ) - - pretraining_data_path = args.get('pretraining_data_dir', '') - pretraining_data_path = Path(pretraining_data_path) if pretraining_data_path is not None else None - - model_path = Path( args.get('model_path', '') ) - model_name = args.get('model_name', '') - save_interval_min = 15 - debug = args.get('debug', '') - execute_programs = args.get('execute_programs', []) - - if not training_data_src_path.exists(): - io.log_err('Training data src directory does not exist.') - break - - if not training_data_dst_path.exists(): - io.log_err('Training data dst directory does not exist.') - break - - if not model_path.exists(): - model_path.mkdir(exist_ok=True) - - model = models.import_model(model_name)( - model_path, - training_data_src_path=training_data_src_path, - training_data_dst_path=training_data_dst_path, - pretraining_data_path=pretraining_data_path, - debug=debug, - device_args=device_args) - - is_reached_goal = model.is_reached_iter_goal() - - shared_state = { 'after_save' : False } - loss_string = "" - save_iter = model.get_iter() - def model_save(): - if not debug and not is_reached_goal: - io.log_info ("Saving....", end='\r') - model.save() - shared_state['after_save'] = True - - def send_preview(): - if not debug: - previews = model.get_previews() - c2s.put ( {'op':'show', 'previews': previews, 'iter':model.get_iter(), 'loss_history': model.get_loss_history().copy() } ) - else: - previews = [( 'debug, press update for new', model.debug_one_iter())] - c2s.put ( {'op':'show', 'previews': previews} ) - - - if model.is_first_run(): - model_save() - - if model.get_target_iter() != 0: - if is_reached_goal: - io.log_info('Model already trained to target iteration. You can use preview.') - else: - io.log_info('Starting. Target iteration: %d. Press "Enter" to stop training and save model.' % ( model.get_target_iter() ) ) - else: - io.log_info('Starting. Press "Enter" to stop training and save model.') - - last_save_time = time.time() - - execute_programs = [ [x[0], x[1], time.time() ] for x in execute_programs ] - - for i in itertools.count(0,1): - if not debug: - cur_time = time.time() - - for x in execute_programs: - prog_time, prog, last_time = x - exec_prog = False - if prog_time > 0 and (cur_time - start_time) >= prog_time: - x[0] = 0 - exec_prog = True - elif prog_time < 0 and (cur_time - last_time) >= -prog_time: - x[2] = cur_time - exec_prog = True - - if exec_prog: - try: - exec(prog) - except Exception as e: - print("Unable to execute program: %s" % (prog) ) - - if not is_reached_goal: - iter, iter_time = model.train_one_iter() - - loss_history = model.get_loss_history() - time_str = time.strftime("[%H:%M:%S]") - if iter_time >= 10: - loss_string = "{0}[#{1:06d}][{2:.5s}s]".format ( time_str, iter, '{:0.4f}'.format(iter_time) ) - else: - loss_string = "{0}[#{1:06d}][{2:04d}ms]".format ( time_str, iter, int(iter_time*1000) ) - - if shared_state['after_save']: - shared_state['after_save'] = False - last_save_time = time.time() #upd last_save_time only after save+one_iter, because plaidML rebuilds programs after save https://github.com/plaidml/plaidml/issues/274 - - mean_loss = np.mean ( [ np.array(loss_history[i]) for i in range(save_iter, iter) ], axis=0) - - for loss_value in mean_loss: - loss_string += "[%.4f]" % (loss_value) - - io.log_info (loss_string) - - save_iter = iter - else: - for loss_value in loss_history[-1]: - loss_string += "[%.4f]" % (loss_value) - - if io.is_colab(): - io.log_info ('\r' + loss_string, end='') - else: - io.log_info (loss_string, end='\r') - - if model.get_target_iter() != 0 and model.is_reached_iter_goal(): - io.log_info ('Reached target iteration.') - model_save() - is_reached_goal = True - io.log_info ('You can use preview now.') - - if not is_reached_goal and (time.time() - last_save_time) >= save_interval_min*60: - model_save() - send_preview() - - if i==0: - if is_reached_goal: - model.pass_one_iter() - send_preview() - - if debug: - time.sleep(0.005) - - while not s2c.empty(): - input = s2c.get() - op = input['op'] - if op == 'save': - model_save() - elif op == 'preview': - if is_reached_goal: - model.pass_one_iter() - send_preview() - elif op == 'close': - model_save() - i = -1 - break - - if i == -1: - break - - - - model.finalize() - - except Exception as e: - print ('Error: %s' % (str(e))) - traceback.print_exc() - break - c2s.put ( {'op':'close'} ) - - - -def main(args, device_args): - io.log_info ("Running trainer.\r\n") - - no_preview = args.get('no_preview', False) - - s2c = queue.Queue() - c2s = queue.Queue() - - thread = threading.Thread(target=trainerThread, args=(s2c, c2s, args, device_args) ) - thread.start() - - if no_preview: - while True: - if not c2s.empty(): - input = c2s.get() - op = input.get('op','') - if op == 'close': - break - try: - io.process_messages(0.1) - except KeyboardInterrupt: - s2c.put ( {'op': 'close'} ) - else: - wnd_name = "Training preview" - io.named_window(wnd_name) - io.capture_keys(wnd_name) - - previews = None - loss_history = None - selected_preview = 0 - update_preview = False - is_showing = False - is_waiting_preview = False - show_last_history_iters_count = 0 - iter = 0 - while True: - if not c2s.empty(): - input = c2s.get() - op = input['op'] - if op == 'show': - is_waiting_preview = False - loss_history = input['loss_history'] if 'loss_history' in input.keys() else None - previews = input['previews'] if 'previews' in input.keys() else None - iter = input['iter'] if 'iter' in input.keys() else 0 - if previews is not None: - max_w = 0 - max_h = 0 - for (preview_name, preview_rgb) in previews: - (h, w, c) = preview_rgb.shape - max_h = max (max_h, h) - max_w = max (max_w, w) - - max_size = 800 - if max_h > max_size: - max_w = int( max_w / (max_h / max_size) ) - max_h = max_size - - #make all previews size equal - for preview in previews[:]: - (preview_name, preview_rgb) = preview - (h, w, c) = preview_rgb.shape - if h != max_h or w != max_w: - previews.remove(preview) - previews.append ( (preview_name, cv2.resize(preview_rgb, (max_w, max_h))) ) - selected_preview = selected_preview % len(previews) - update_preview = True - elif op == 'close': - break - - if update_preview: - update_preview = False - - selected_preview_name = previews[selected_preview][0] - selected_preview_rgb = previews[selected_preview][1] - (h,w,c) = selected_preview_rgb.shape - - # HEAD - head_lines = [ - '[s]:save [enter]:exit', - '[p]:update [space]:next preview [l]:change history range', - 'Preview: "%s" [%d/%d]' % (selected_preview_name,selected_preview+1, len(previews) ) - ] - head_line_height = 15 - head_height = len(head_lines) * head_line_height - head = np.ones ( (head_height,w,c) ) * 0.1 - - for i in range(0, len(head_lines)): - t = i*head_line_height - b = (i+1)*head_line_height - head[t:b, 0:w] += imagelib.get_text_image ( (head_line_height,w,c) , head_lines[i], color=[0.8]*c ) - - final = head - - if loss_history is not None: - if show_last_history_iters_count == 0: - loss_history_to_show = loss_history - else: - loss_history_to_show = loss_history[-show_last_history_iters_count:] - - lh_img = models.ModelBase.get_loss_history_preview(loss_history_to_show, iter, w, c) - final = np.concatenate ( [final, lh_img], axis=0 ) - - final = np.concatenate ( [final, selected_preview_rgb], axis=0 ) - final = np.clip(final, 0, 1) - - io.show_image( wnd_name, (final*255).astype(np.uint8) ) - is_showing = True - - key_events = io.get_key_events(wnd_name) - key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) - - if key == ord('\n') or key == ord('\r'): - s2c.put ( {'op': 'close'} ) - elif key == ord('s'): - s2c.put ( {'op': 'save'} ) - elif key == ord('p'): - if not is_waiting_preview: - is_waiting_preview = True - s2c.put ( {'op': 'preview'} ) - elif key == ord('l'): - if show_last_history_iters_count == 0: - show_last_history_iters_count = 5000 - elif show_last_history_iters_count == 5000: - show_last_history_iters_count = 10000 - elif show_last_history_iters_count == 10000: - show_last_history_iters_count = 50000 - elif show_last_history_iters_count == 50000: - show_last_history_iters_count = 100000 - elif show_last_history_iters_count == 100000: - show_last_history_iters_count = 0 - update_preview = True - elif key == ord(' '): - selected_preview = (selected_preview + 1) % len(previews) - update_preview = True - - try: - io.process_messages(0.1) - except KeyboardInterrupt: - s2c.put ( {'op': 'close'} ) - - io.destroy_all_windows() +import sys +import traceback +import queue +import threading +import time +import numpy as np +import itertools +from pathlib import Path +from utils import Path_utils +import imagelib +import cv2 +import models +from interact import interact as io + +def trainerThread (s2c, c2s, args, device_args): + while True: + try: + start_time = time.time() + + training_data_src_path = Path( args.get('training_data_src_dir', '') ) + training_data_dst_path = Path( args.get('training_data_dst_dir', '') ) + + pretraining_data_path = args.get('pretraining_data_dir', '') + pretraining_data_path = Path(pretraining_data_path) if pretraining_data_path is not None else None + + model_path = Path( args.get('model_path', '') ) + model_name = args.get('model_name', '') + save_interval_min = 15 + debug = args.get('debug', '') + execute_programs = args.get('execute_programs', []) + + if not training_data_src_path.exists(): + io.log_err('Training data src directory does not exist.') + break + + if not training_data_dst_path.exists(): + io.log_err('Training data dst directory does not exist.') + break + + if not model_path.exists(): + model_path.mkdir(exist_ok=True) + + model = models.import_model(model_name)( + model_path, + training_data_src_path=training_data_src_path, + training_data_dst_path=training_data_dst_path, + pretraining_data_path=pretraining_data_path, + debug=debug, + device_args=device_args) + + is_reached_goal = model.is_reached_iter_goal() + + shared_state = { 'after_save' : False } + loss_string = "" + save_iter = model.get_iter() + def model_save(): + if not debug and not is_reached_goal: + io.log_info ("Saving....", end='\r') + model.save() + shared_state['after_save'] = True + + def send_preview(): + if not debug: + previews = model.get_previews() + c2s.put ( {'op':'show', 'previews': previews, 'iter':model.get_iter(), 'loss_history': model.get_loss_history().copy() } ) + else: + previews = [( 'debug, press update for new', model.debug_one_iter())] + c2s.put ( {'op':'show', 'previews': previews} ) + + + if model.is_first_run(): + model_save() + + if model.get_target_iter() != 0: + if is_reached_goal: + io.log_info('Model already trained to target iteration. You can use preview.') + else: + io.log_info('Starting. Target iteration: %d. Press "Enter" to stop training and save model.' % ( model.get_target_iter() ) ) + else: + io.log_info('Starting. Press "Enter" to stop training and save model.') + + last_save_time = time.time() + + execute_programs = [ [x[0], x[1], time.time() ] for x in execute_programs ] + + for i in itertools.count(0,1): + if not debug: + cur_time = time.time() + + for x in execute_programs: + prog_time, prog, last_time = x + exec_prog = False + if prog_time > 0 and (cur_time - start_time) >= prog_time: + x[0] = 0 + exec_prog = True + elif prog_time < 0 and (cur_time - last_time) >= -prog_time: + x[2] = cur_time + exec_prog = True + + if exec_prog: + try: + exec(prog) + except Exception as e: + print("Unable to execute program: %s" % (prog) ) + + if not is_reached_goal: + iter, iter_time = model.train_one_iter() + + loss_history = model.get_loss_history() + time_str = time.strftime("[%H:%M:%S]") + if iter_time >= 10: + loss_string = "{0}[#{1:06d}][{2:.5s}s]".format ( time_str, iter, '{:0.4f}'.format(iter_time) ) + else: + loss_string = "{0}[#{1:06d}][{2:04d}ms]".format ( time_str, iter, int(iter_time*1000) ) + + if shared_state['after_save']: + shared_state['after_save'] = False + last_save_time = time.time() #upd last_save_time only after save+one_iter, because plaidML rebuilds programs after save https://github.com/plaidml/plaidml/issues/274 + + mean_loss = np.mean ( [ np.array(loss_history[i]) for i in range(save_iter, iter) ], axis=0) + + for loss_value in mean_loss: + loss_string += "[%.4f]" % (loss_value) + + io.log_info (loss_string) + + save_iter = iter + else: + for loss_value in loss_history[-1]: + loss_string += "[%.4f]" % (loss_value) + + if io.is_colab(): + io.log_info ('\r' + loss_string, end='') + else: + io.log_info (loss_string, end='\r') + + if model.get_target_iter() != 0 and model.is_reached_iter_goal(): + io.log_info ('Reached target iteration.') + model_save() + is_reached_goal = True + io.log_info ('You can use preview now.') + + if not is_reached_goal and (time.time() - last_save_time) >= save_interval_min*60: + model_save() + send_preview() + + if i==0: + if is_reached_goal: + model.pass_one_iter() + send_preview() + + if debug: + time.sleep(0.005) + + while not s2c.empty(): + input = s2c.get() + op = input['op'] + if op == 'save': + model_save() + elif op == 'preview': + if is_reached_goal: + model.pass_one_iter() + send_preview() + elif op == 'close': + model_save() + i = -1 + break + + if i == -1: + break + + + + model.finalize() + + except Exception as e: + print ('Error: %s' % (str(e))) + traceback.print_exc() + break + c2s.put ( {'op':'close'} ) + + + +def main(args, device_args): + io.log_info ("Running trainer.\r\n") + + no_preview = args.get('no_preview', False) + + s2c = queue.Queue() + c2s = queue.Queue() + + thread = threading.Thread(target=trainerThread, args=(s2c, c2s, args, device_args) ) + thread.start() + + if no_preview: + while True: + if not c2s.empty(): + input = c2s.get() + op = input.get('op','') + if op == 'close': + break + try: + io.process_messages(0.1) + except KeyboardInterrupt: + s2c.put ( {'op': 'close'} ) + else: + wnd_name = "Training preview" + io.named_window(wnd_name) + io.capture_keys(wnd_name) + + previews = None + loss_history = None + selected_preview = 0 + update_preview = False + is_showing = False + is_waiting_preview = False + show_last_history_iters_count = 0 + iter = 0 + while True: + if not c2s.empty(): + input = c2s.get() + op = input['op'] + if op == 'show': + is_waiting_preview = False + loss_history = input['loss_history'] if 'loss_history' in input.keys() else None + previews = input['previews'] if 'previews' in input.keys() else None + iter = input['iter'] if 'iter' in input.keys() else 0 + if previews is not None: + max_w = 0 + max_h = 0 + for (preview_name, preview_rgb) in previews: + (h, w, c) = preview_rgb.shape + max_h = max (max_h, h) + max_w = max (max_w, w) + + max_size = 800 + if max_h > max_size: + max_w = int( max_w / (max_h / max_size) ) + max_h = max_size + + #make all previews size equal + for preview in previews[:]: + (preview_name, preview_rgb) = preview + (h, w, c) = preview_rgb.shape + if h != max_h or w != max_w: + previews.remove(preview) + previews.append ( (preview_name, cv2.resize(preview_rgb, (max_w, max_h))) ) + selected_preview = selected_preview % len(previews) + update_preview = True + elif op == 'close': + break + + if update_preview: + update_preview = False + + selected_preview_name = previews[selected_preview][0] + selected_preview_rgb = previews[selected_preview][1] + (h,w,c) = selected_preview_rgb.shape + + # HEAD + head_lines = [ + '[s]:save [enter]:exit', + '[p]:update [space]:next preview [l]:change history range', + 'Preview: "%s" [%d/%d]' % (selected_preview_name,selected_preview+1, len(previews) ) + ] + head_line_height = 15 + head_height = len(head_lines) * head_line_height + head = np.ones ( (head_height,w,c) ) * 0.1 + + for i in range(0, len(head_lines)): + t = i*head_line_height + b = (i+1)*head_line_height + head[t:b, 0:w] += imagelib.get_text_image ( (head_line_height,w,c) , head_lines[i], color=[0.8]*c ) + + final = head + + if loss_history is not None: + if show_last_history_iters_count == 0: + loss_history_to_show = loss_history + else: + loss_history_to_show = loss_history[-show_last_history_iters_count:] + + lh_img = models.ModelBase.get_loss_history_preview(loss_history_to_show, iter, w, c) + final = np.concatenate ( [final, lh_img], axis=0 ) + + final = np.concatenate ( [final, selected_preview_rgb], axis=0 ) + final = np.clip(final, 0, 1) + + io.show_image( wnd_name, (final*255).astype(np.uint8) ) + is_showing = True + + key_events = io.get_key_events(wnd_name) + key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) + + if key == ord('\n') or key == ord('\r'): + s2c.put ( {'op': 'close'} ) + elif key == ord('s'): + s2c.put ( {'op': 'save'} ) + elif key == ord('p'): + if not is_waiting_preview: + is_waiting_preview = True + s2c.put ( {'op': 'preview'} ) + elif key == ord('l'): + if show_last_history_iters_count == 0: + show_last_history_iters_count = 5000 + elif show_last_history_iters_count == 5000: + show_last_history_iters_count = 10000 + elif show_last_history_iters_count == 10000: + show_last_history_iters_count = 50000 + elif show_last_history_iters_count == 50000: + show_last_history_iters_count = 100000 + elif show_last_history_iters_count == 100000: + show_last_history_iters_count = 0 + update_preview = True + elif key == ord(' '): + selected_preview = (selected_preview + 1) % len(previews) + update_preview = True + + try: + io.process_messages(0.1) + except KeyboardInterrupt: + s2c.put ( {'op': 'close'} ) + + io.destroy_all_windows() diff --git a/mainscripts/Util.py b/mainscripts/Util.py index 1921287..389999a 100644 --- a/mainscripts/Util.py +++ b/mainscripts/Util.py @@ -1,156 +1,156 @@ -import cv2 -from pathlib import Path -from utils import Path_utils -from utils.DFLPNG import DFLPNG -from utils.DFLJPG import DFLJPG -from utils.cv2_utils import * -from facelib import LandmarksProcessor -from interact import interact as io - -def remove_fanseg_file (filepath): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - return - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - return - - dflimg.remove_fanseg_mask() - dflimg.embed_and_set( str(filepath) ) - - -def remove_fanseg_folder(input_path): - input_path = Path(input_path) - - io.log_info ("Removing fanseg mask...\r\n") - - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Removing"): - filepath = Path(filepath) - remove_fanseg_file(filepath) - -def convert_png_to_jpg_file (filepath): - filepath = Path(filepath) - - if filepath.suffix != '.png': - return - - dflpng = DFLPNG.load (str(filepath) ) - if dflpng is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - return - - dfl_dict = dflpng.getDFLDictData() - - img = cv2_imread (str(filepath)) - new_filepath = str(filepath.parent / (filepath.stem + '.jpg')) - cv2_imwrite ( new_filepath, img, [int(cv2.IMWRITE_JPEG_QUALITY), 85]) - - DFLJPG.embed_data( new_filepath, - face_type=dfl_dict.get('face_type', None), - landmarks=dfl_dict.get('landmarks', None), - ie_polys=dfl_dict.get('ie_polys', None), - source_filename=dfl_dict.get('source_filename', None), - source_rect=dfl_dict.get('source_rect', None), - source_landmarks=dfl_dict.get('source_landmarks', None) ) - - filepath.unlink() - -def convert_png_to_jpg_folder (input_path): - input_path = Path(input_path) - - io.log_info ("Converting PNG to JPG...\r\n") - - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Converting"): - filepath = Path(filepath) - convert_png_to_jpg_file(filepath) - -def add_landmarks_debug_images(input_path): - io.log_info ("Adding landmarks debug images...") - - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): - filepath = Path(filepath) - - img = cv2_imread(str(filepath)) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - - if img is not None: - face_landmarks = dflimg.get_landmarks() - LandmarksProcessor.draw_landmarks(img, face_landmarks, transparent_mask=True, ie_polys=dflimg.get_ie_polys() ) - - output_file = '{}{}'.format( str(Path(str(input_path)) / filepath.stem), '_debug.jpg') - cv2_imwrite(output_file, img, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) - -def recover_original_aligned_filename(input_path): - io.log_info ("Recovering original aligned filename...") - - files = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): - filepath = Path(filepath) - - if filepath.suffix == '.png': - dflimg = DFLPNG.load( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - - files += [ [filepath, None, dflimg.get_source_filename(), False] ] - - files_len = len(files) - for i in io.progress_bar_generator( range(files_len), "Sorting" ): - fp, _, sf, converted = files[i] - - if converted: - continue - - sf_stem = Path(sf).stem - - files[i][1] = fp.parent / ( sf_stem + '_0' + fp.suffix ) - files[i][3] = True - c = 1 - - for j in range(i+1, files_len): - fp_j, _, sf_j, converted_j = files[j] - if converted_j: - continue - - if sf_j == sf: - files[j][1] = fp_j.parent / ( sf_stem + ('_%d' % (c)) + fp_j.suffix ) - files[j][3] = True - c += 1 - - for file in io.progress_bar_generator( files, "Renaming", leave=False ): - fs, _, _, _ = file - dst = fs.parent / ( fs.stem + '_tmp' + fs.suffix ) - try: - fs.rename (dst) - except: - io.log_err ('fail to rename %s' % (fs.name) ) - - for file in io.progress_bar_generator( files, "Renaming" ): - fs, fd, _, _ = file - fs = fs.parent / ( fs.stem + '_tmp' + fs.suffix ) - try: - fs.rename (fd) - except: - io.log_err ('fail to rename %s' % (fs.name) ) +import cv2 +from pathlib import Path +from utils import Path_utils +from utils.DFLPNG import DFLPNG +from utils.DFLJPG import DFLJPG +from utils.cv2_utils import * +from facelib import LandmarksProcessor +from interact import interact as io + +def remove_fanseg_file (filepath): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + return + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + return + + dflimg.remove_fanseg_mask() + dflimg.embed_and_set( str(filepath) ) + + +def remove_fanseg_folder(input_path): + input_path = Path(input_path) + + io.log_info ("Removing fanseg mask...\r\n") + + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Removing"): + filepath = Path(filepath) + remove_fanseg_file(filepath) + +def convert_png_to_jpg_file (filepath): + filepath = Path(filepath) + + if filepath.suffix != '.png': + return + + dflpng = DFLPNG.load (str(filepath) ) + if dflpng is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + return + + dfl_dict = dflpng.getDFLDictData() + + img = cv2_imread (str(filepath)) + new_filepath = str(filepath.parent / (filepath.stem + '.jpg')) + cv2_imwrite ( new_filepath, img, [int(cv2.IMWRITE_JPEG_QUALITY), 85]) + + DFLJPG.embed_data( new_filepath, + face_type=dfl_dict.get('face_type', None), + landmarks=dfl_dict.get('landmarks', None), + ie_polys=dfl_dict.get('ie_polys', None), + source_filename=dfl_dict.get('source_filename', None), + source_rect=dfl_dict.get('source_rect', None), + source_landmarks=dfl_dict.get('source_landmarks', None) ) + + filepath.unlink() + +def convert_png_to_jpg_folder (input_path): + input_path = Path(input_path) + + io.log_info ("Converting PNG to JPG...\r\n") + + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Converting"): + filepath = Path(filepath) + convert_png_to_jpg_file(filepath) + +def add_landmarks_debug_images(input_path): + io.log_info ("Adding landmarks debug images...") + + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): + filepath = Path(filepath) + + img = cv2_imread(str(filepath)) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + continue + + if img is not None: + face_landmarks = dflimg.get_landmarks() + LandmarksProcessor.draw_landmarks(img, face_landmarks, transparent_mask=True, ie_polys=dflimg.get_ie_polys() ) + + output_file = '{}{}'.format( str(Path(str(input_path)) / filepath.stem), '_debug.jpg') + cv2_imwrite(output_file, img, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) + +def recover_original_aligned_filename(input_path): + io.log_info ("Recovering original aligned filename...") + + files = [] + for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): + filepath = Path(filepath) + + if filepath.suffix == '.png': + dflimg = DFLPNG.load( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + + if dflimg is None: + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + continue + + files += [ [filepath, None, dflimg.get_source_filename(), False] ] + + files_len = len(files) + for i in io.progress_bar_generator( range(files_len), "Sorting" ): + fp, _, sf, converted = files[i] + + if converted: + continue + + sf_stem = Path(sf).stem + + files[i][1] = fp.parent / ( sf_stem + '_0' + fp.suffix ) + files[i][3] = True + c = 1 + + for j in range(i+1, files_len): + fp_j, _, sf_j, converted_j = files[j] + if converted_j: + continue + + if sf_j == sf: + files[j][1] = fp_j.parent / ( sf_stem + ('_%d' % (c)) + fp_j.suffix ) + files[j][3] = True + c += 1 + + for file in io.progress_bar_generator( files, "Renaming", leave=False ): + fs, _, _, _ = file + dst = fs.parent / ( fs.stem + '_tmp' + fs.suffix ) + try: + fs.rename (dst) + except: + io.log_err ('fail to rename %s' % (fs.name) ) + + for file in io.progress_bar_generator( files, "Renaming" ): + fs, fd, _, _ = file + fs = fs.parent / ( fs.stem + '_tmp' + fs.suffix ) + try: + fs.rename (fd) + except: + io.log_err ('fail to rename %s' % (fs.name) ) diff --git a/mainscripts/VideoEd.py b/mainscripts/VideoEd.py index 28e69d4..2951a06 100644 --- a/mainscripts/VideoEd.py +++ b/mainscripts/VideoEd.py @@ -1,199 +1,199 @@ -import subprocess -import numpy as np -import ffmpeg -from pathlib import Path -from utils import Path_utils -from interact import interact as io - -def extract_video(input_file, output_dir, output_ext=None, fps=None): - input_file_path = Path(input_file) - output_path = Path(output_dir) - - if not output_path.exists(): - output_path.mkdir(exist_ok=True) - - - if input_file_path.suffix == '.*': - input_file_path = Path_utils.get_first_file_by_stem (input_file_path.parent, input_file_path.stem) - else: - if not input_file_path.exists(): - input_file_path = None - - if input_file_path is None: - io.log_err("input_file not found.") - return - - if fps is None: - fps = io.input_int ("Enter FPS ( ?:help skip:fullfps ) : ", 0, help_message="How many frames of every second of the video will be extracted.") - - if output_ext is None: - output_ext = io.input_str ("Output image format? ( jpg png ?:help skip:png ) : ", "png", ["png","jpg"], help_message="png is lossless, but extraction is x10 slower for HDD, requires x10 more disk space than jpg.") - - for filename in Path_utils.get_image_paths (output_path, ['.'+output_ext]): - Path(filename).unlink() - - job = ffmpeg.input(str(input_file_path)) - - kwargs = {'pix_fmt': 'rgb24'} - if fps != 0: - kwargs.update ({'r':str(fps)}) - - if output_ext == 'jpg': - kwargs.update ({'q:v':'2'}) #highest quality for jpg - - job = job.output( str (output_path / ('%5d.'+output_ext)), **kwargs ) - - try: - job = job.run() - except: - io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) - -def cut_video ( input_file, from_time=None, to_time=None, audio_track_id=None, bitrate=None): - input_file_path = Path(input_file) - if input_file_path is None: - io.log_err("input_file not found.") - return - - output_file_path = input_file_path.parent / (input_file_path.stem + "_cut" + input_file_path.suffix) - - if from_time is None: - from_time = io.input_str ("From time (skip: 00:00:00.000) : ", "00:00:00.000") - - if to_time is None: - to_time = io.input_str ("To time (skip: 00:00:00.000) : ", "00:00:00.000") - - if audio_track_id is None: - audio_track_id = io.input_int ("Specify audio track id. ( skip:0 ) : ", 0) - - if bitrate is None: - bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:25) : ", 25) ) - - kwargs = {"c:v": "libx264", - "b:v": "%dM" %(bitrate), - "pix_fmt": "yuv420p", - } - - job = ffmpeg.input(str(input_file_path), ss=from_time, to=to_time) - - job_v = job['v:0'] - job_a = job['a:' + str(audio_track_id) + '?' ] - - job = ffmpeg.output(job_v, job_a, str(output_file_path), **kwargs).overwrite_output() - - try: - job = job.run() - except: - io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) - -def denoise_image_sequence( input_dir, ext=None, factor=None ): - input_path = Path(input_dir) - - if not input_path.exists(): - io.log_err("input_dir not found.") - return - - if ext is None: - ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") - - if factor is None: - factor = np.clip ( io.input_int ("Denoise factor? (1-20 default:5) : ", 5), 1, 20 ) - - job = ( ffmpeg - .input(str ( input_path / ('%5d.'+ext) ) ) - .filter("hqdn3d", factor, factor, 5,5) - .output(str ( input_path / ('%5d.'+ext) ) ) - ) - - try: - job = job.run() - except: - io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) - -def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, lossless=None ): - input_path = Path(input_dir) - output_file_path = Path(output_file) - reference_file_path = Path(reference_file) if reference_file is not None else None - - if not input_path.exists(): - io.log_err("input_dir not found.") - return - - if not output_file_path.parent.exists(): - output_file_path.parent.mkdir(parents=True, exist_ok=True) - return - - out_ext = output_file_path.suffix - - if ext is None: - ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") - - if lossless is None: - lossless = io.input_bool ("Use lossless codec ? ( default:no ) : ", False) - - video_id = None - audio_id = None - ref_in_a = None - if reference_file_path is not None: - if reference_file_path.suffix == '.*': - reference_file_path = Path_utils.get_first_file_by_stem (reference_file_path.parent, reference_file_path.stem) - else: - if not reference_file_path.exists(): - reference_file_path = None - - if reference_file_path is None: - io.log_err("reference_file not found.") - return - - #probing reference file - probe = ffmpeg.probe (str(reference_file_path)) - - #getting first video and audio streams id with fps - for stream in probe['streams']: - if video_id is None and stream['codec_type'] == 'video': - video_id = stream['index'] - fps = stream['r_frame_rate'] - - if audio_id is None and stream['codec_type'] == 'audio': - audio_id = stream['index'] - - if audio_id is not None: - #has audio track - ref_in_a = ffmpeg.input (str(reference_file_path))[str(audio_id)] - - if fps is None: - #if fps not specified and not overwritten by reference-file - fps = max (1, io.input_int ("FPS ? (default:25) : ", 25) ) - - if not lossless and bitrate is None: - bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:16) : ", 16) ) - - i_in = ffmpeg.input(str (input_path / ('%5d.'+ext)), r=fps) - - output_args = [i_in] - - if ref_in_a is not None: - output_args += [ref_in_a] - - output_args += [str (output_file_path)] - - output_kwargs = {} - - if lossless: - output_kwargs.update ({"c:v": "png" - }) - else: - output_kwargs.update ({"c:v": "libx264", - "b:v": "%dM" %(bitrate), - "pix_fmt": "yuv420p", - }) - - output_kwargs.update ({"c:a": "aac", - "b:a": "192k", - "ar" : "48000" - }) - - job = ( ffmpeg.output(*output_args, **output_kwargs).overwrite_output() ) - try: - job = job.run() - except: - io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) +import subprocess +import numpy as np +import ffmpeg +from pathlib import Path +from utils import Path_utils +from interact import interact as io + +def extract_video(input_file, output_dir, output_ext=None, fps=None): + input_file_path = Path(input_file) + output_path = Path(output_dir) + + if not output_path.exists(): + output_path.mkdir(exist_ok=True) + + + if input_file_path.suffix == '.*': + input_file_path = Path_utils.get_first_file_by_stem (input_file_path.parent, input_file_path.stem) + else: + if not input_file_path.exists(): + input_file_path = None + + if input_file_path is None: + io.log_err("input_file not found.") + return + + if fps is None: + fps = io.input_int ("Enter FPS ( ?:help skip:fullfps ) : ", 0, help_message="How many frames of every second of the video will be extracted.") + + if output_ext is None: + output_ext = io.input_str ("Output image format? ( jpg png ?:help skip:png ) : ", "png", ["png","jpg"], help_message="png is lossless, but extraction is x10 slower for HDD, requires x10 more disk space than jpg.") + + for filename in Path_utils.get_image_paths (output_path, ['.'+output_ext]): + Path(filename).unlink() + + job = ffmpeg.input(str(input_file_path)) + + kwargs = {'pix_fmt': 'rgb24'} + if fps != 0: + kwargs.update ({'r':str(fps)}) + + if output_ext == 'jpg': + kwargs.update ({'q:v':'2'}) #highest quality for jpg + + job = job.output( str (output_path / ('%5d.'+output_ext)), **kwargs ) + + try: + job = job.run() + except: + io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) + +def cut_video ( input_file, from_time=None, to_time=None, audio_track_id=None, bitrate=None): + input_file_path = Path(input_file) + if input_file_path is None: + io.log_err("input_file not found.") + return + + output_file_path = input_file_path.parent / (input_file_path.stem + "_cut" + input_file_path.suffix) + + if from_time is None: + from_time = io.input_str ("From time (skip: 00:00:00.000) : ", "00:00:00.000") + + if to_time is None: + to_time = io.input_str ("To time (skip: 00:00:00.000) : ", "00:00:00.000") + + if audio_track_id is None: + audio_track_id = io.input_int ("Specify audio track id. ( skip:0 ) : ", 0) + + if bitrate is None: + bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:25) : ", 25) ) + + kwargs = {"c:v": "libx264", + "b:v": "%dM" %(bitrate), + "pix_fmt": "yuv420p", + } + + job = ffmpeg.input(str(input_file_path), ss=from_time, to=to_time) + + job_v = job['v:0'] + job_a = job['a:' + str(audio_track_id) + '?' ] + + job = ffmpeg.output(job_v, job_a, str(output_file_path), **kwargs).overwrite_output() + + try: + job = job.run() + except: + io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) + +def denoise_image_sequence( input_dir, ext=None, factor=None ): + input_path = Path(input_dir) + + if not input_path.exists(): + io.log_err("input_dir not found.") + return + + if ext is None: + ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") + + if factor is None: + factor = np.clip ( io.input_int ("Denoise factor? (1-20 default:5) : ", 5), 1, 20 ) + + job = ( ffmpeg + .input(str ( input_path / ('%5d.'+ext) ) ) + .filter("hqdn3d", factor, factor, 5,5) + .output(str ( input_path / ('%5d.'+ext) ) ) + ) + + try: + job = job.run() + except: + io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) + +def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, lossless=None ): + input_path = Path(input_dir) + output_file_path = Path(output_file) + reference_file_path = Path(reference_file) if reference_file is not None else None + + if not input_path.exists(): + io.log_err("input_dir not found.") + return + + if not output_file_path.parent.exists(): + output_file_path.parent.mkdir(parents=True, exist_ok=True) + return + + out_ext = output_file_path.suffix + + if ext is None: + ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") + + if lossless is None: + lossless = io.input_bool ("Use lossless codec ? ( default:no ) : ", False) + + video_id = None + audio_id = None + ref_in_a = None + if reference_file_path is not None: + if reference_file_path.suffix == '.*': + reference_file_path = Path_utils.get_first_file_by_stem (reference_file_path.parent, reference_file_path.stem) + else: + if not reference_file_path.exists(): + reference_file_path = None + + if reference_file_path is None: + io.log_err("reference_file not found.") + return + + #probing reference file + probe = ffmpeg.probe (str(reference_file_path)) + + #getting first video and audio streams id with fps + for stream in probe['streams']: + if video_id is None and stream['codec_type'] == 'video': + video_id = stream['index'] + fps = stream['r_frame_rate'] + + if audio_id is None and stream['codec_type'] == 'audio': + audio_id = stream['index'] + + if audio_id is not None: + #has audio track + ref_in_a = ffmpeg.input (str(reference_file_path))[str(audio_id)] + + if fps is None: + #if fps not specified and not overwritten by reference-file + fps = max (1, io.input_int ("FPS ? (default:25) : ", 25) ) + + if not lossless and bitrate is None: + bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:16) : ", 16) ) + + i_in = ffmpeg.input(str (input_path / ('%5d.'+ext)), r=fps) + + output_args = [i_in] + + if ref_in_a is not None: + output_args += [ref_in_a] + + output_args += [str (output_file_path)] + + output_kwargs = {} + + if lossless: + output_kwargs.update ({"c:v": "png" + }) + else: + output_kwargs.update ({"c:v": "libx264", + "b:v": "%dM" %(bitrate), + "pix_fmt": "yuv420p", + }) + + output_kwargs.update ({"c:a": "aac", + "b:a": "192k", + "ar" : "48000" + }) + + job = ( ffmpeg.output(*output_args, **output_kwargs).overwrite_output() ) + try: + job = job.run() + except: + io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) diff --git a/mathlib/__init__.py b/mathlib/__init__.py index 50061ee..a11e725 100644 --- a/mathlib/__init__.py +++ b/mathlib/__init__.py @@ -1,25 +1,25 @@ -import numpy as np -import math -from .umeyama import umeyama - -def get_power_of_two(x): - i = 0 - while (1 << i) < x: - i += 1 - return i - -def rotationMatrixToEulerAngles(R) : - sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) - singular = sy < 1e-6 - if not singular : - x = math.atan2(R[2,1] , R[2,2]) - y = math.atan2(-R[2,0], sy) - z = math.atan2(R[1,0], R[0,0]) - else : - x = math.atan2(-R[1,2], R[1,1]) - y = math.atan2(-R[2,0], sy) - z = 0 - return np.array([x, y, z]) - -def polygon_area(x,y): - return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1))) +import numpy as np +import math +from .umeyama import umeyama + +def get_power_of_two(x): + i = 0 + while (1 << i) < x: + i += 1 + return i + +def rotationMatrixToEulerAngles(R) : + sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) + singular = sy < 1e-6 + if not singular : + x = math.atan2(R[2,1] , R[2,2]) + y = math.atan2(-R[2,0], sy) + z = math.atan2(R[1,0], R[0,0]) + else : + x = math.atan2(-R[1,2], R[1,1]) + y = math.atan2(-R[2,0], sy) + z = 0 + return np.array([x, y, z]) + +def polygon_area(x,y): + return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1))) diff --git a/mathlib/umeyama.py b/mathlib/umeyama.py index 2c6491a..7c6b2d0 100644 --- a/mathlib/umeyama.py +++ b/mathlib/umeyama.py @@ -1,71 +1,71 @@ -import numpy as np - -def umeyama(src, dst, estimate_scale): - """Estimate N-D similarity transformation with or without scaling. - Parameters - ---------- - src : (M, N) array - Source coordinates. - dst : (M, N) array - Destination coordinates. - estimate_scale : bool - Whether to estimate scaling factor. - Returns - ------- - T : (N + 1, N + 1) - The homogeneous similarity transformation matrix. The matrix contains - NaN values only if the problem is not well-conditioned. - References - ---------- - .. [1] "Least-squares estimation of transformation parameters between two - point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573 - """ - - num = src.shape[0] - dim = src.shape[1] - - # Compute mean of src and dst. - src_mean = src.mean(axis=0) - dst_mean = dst.mean(axis=0) - - # Subtract mean from src and dst. - src_demean = src - src_mean - dst_demean = dst - dst_mean - - # Eq. (38). - A = np.dot(dst_demean.T, src_demean) / num - - # Eq. (39). - d = np.ones((dim,), dtype=np.double) - if np.linalg.det(A) < 0: - d[dim - 1] = -1 - - T = np.eye(dim + 1, dtype=np.double) - - U, S, V = np.linalg.svd(A) - - # Eq. (40) and (43). - rank = np.linalg.matrix_rank(A) - if rank == 0: - return np.nan * T - elif rank == dim - 1: - if np.linalg.det(U) * np.linalg.det(V) > 0: - T[:dim, :dim] = np.dot(U, V) - else: - s = d[dim - 1] - d[dim - 1] = -1 - T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) - d[dim - 1] = s - else: - T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T)) - - if estimate_scale: - # Eq. (41) and (42). - scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d) - else: - scale = 1.0 - - T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T) - T[:dim, :dim] *= scale - - return T +import numpy as np + +def umeyama(src, dst, estimate_scale): + """Estimate N-D similarity transformation with or without scaling. + Parameters + ---------- + src : (M, N) array + Source coordinates. + dst : (M, N) array + Destination coordinates. + estimate_scale : bool + Whether to estimate scaling factor. + Returns + ------- + T : (N + 1, N + 1) + The homogeneous similarity transformation matrix. The matrix contains + NaN values only if the problem is not well-conditioned. + References + ---------- + .. [1] "Least-squares estimation of transformation parameters between two + point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573 + """ + + num = src.shape[0] + dim = src.shape[1] + + # Compute mean of src and dst. + src_mean = src.mean(axis=0) + dst_mean = dst.mean(axis=0) + + # Subtract mean from src and dst. + src_demean = src - src_mean + dst_demean = dst - dst_mean + + # Eq. (38). + A = np.dot(dst_demean.T, src_demean) / num + + # Eq. (39). + d = np.ones((dim,), dtype=np.double) + if np.linalg.det(A) < 0: + d[dim - 1] = -1 + + T = np.eye(dim + 1, dtype=np.double) + + U, S, V = np.linalg.svd(A) + + # Eq. (40) and (43). + rank = np.linalg.matrix_rank(A) + if rank == 0: + return np.nan * T + elif rank == dim - 1: + if np.linalg.det(U) * np.linalg.det(V) > 0: + T[:dim, :dim] = np.dot(U, V) + else: + s = d[dim - 1] + d[dim - 1] = -1 + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) + d[dim - 1] = s + else: + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T)) + + if estimate_scale: + # Eq. (41) and (42). + scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d) + else: + scale = 1.0 + + T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T) + T[:dim, :dim] *= scale + + return T diff --git a/models/Model_DEV_FANSEG/Model.py b/models/Model_DEV_FANSEG/Model.py index c095b27..5ccb320 100644 --- a/models/Model_DEV_FANSEG/Model.py +++ b/models/Model_DEV_FANSEG/Model.py @@ -1,102 +1,102 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from facelib import FANSegmentator -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_enable_autobackup=False, - ask_write_preview_history=False, - ask_target_iter=False, - ask_sort_by_yaw=False, - ask_random_flip=False, - ask_src_scale_mod=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - default_face_type = 'f' - if is_first_run: - self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="").lower() - else: - self.options['face_type'] = self.options.get('face_type', default_face_type) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {1.5:4} ) - - self.resolution = 256 - self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - - - self.fan_seg = FANSegmentator(self.resolution, - FaceType.toString(self.face_type), - load_weights=not self.is_first_run(), - weights_file_root=self.get_model_root_path(), - training=True) - - if self.is_training_mode: - t = SampleProcessor.Types - face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True), - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution' : self.resolution, 'motion_blur':(25, 1) }, - { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_M), 'resolution': self.resolution }, - ]), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True ), - output_sample_types=[ { 'types': (t.IMG_TRANSFORMED , face_type, t.MODE_BGR_SHUFFLE), 'resolution' : self.resolution}, - ]) - ]) - - #override - def onSave(self): - self.fan_seg.save_weights() - - #override - def onTrainOneIter(self, generators_samples, generators_list): - target_src, target_src_mask = generators_samples[0] - - loss = self.fan_seg.train_on_batch( [target_src], [target_src_mask] ) - - return ( ('loss', loss), ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][0][0:4] #first 4 samples - test_B = sample[1][0][0:4] #first 4 samples - - mAA = self.fan_seg.extract(test_A) - mBB = self.fan_seg.extract(test_B) - - mAA = np.repeat ( mAA, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - mAA[i], - test_A[i,:,:,0:3]*mAA[i], - ), axis=1) ) - - st2 = [] - for i in range(0, len(test_B)): - st2.append ( np.concatenate ( ( - test_B[i,:,:,0:3], - mBB[i], - test_B[i,:,:,0:3]*mBB[i], - ), axis=1) ) - - return [ ('training data', np.concatenate ( st, axis=0 ) ), - ('evaluating data', np.concatenate ( st2, axis=0 ) ), - ] +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from facelib import FANSegmentator +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, + ask_enable_autobackup=False, + ask_write_preview_history=False, + ask_target_iter=False, + ask_sort_by_yaw=False, + ask_random_flip=False, + ask_src_scale_mod=False) + + #override + def onInitializeOptions(self, is_first_run, ask_override): + default_face_type = 'f' + if is_first_run: + self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="").lower() + else: + self.options['face_type'] = self.options.get('face_type', default_face_type) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {1.5:4} ) + + self.resolution = 256 + self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF + + + self.fan_seg = FANSegmentator(self.resolution, + FaceType.toString(self.face_type), + load_weights=not self.is_first_run(), + weights_file_root=self.get_model_root_path(), + training=True) + + if self.is_training_mode: + t = SampleProcessor.Types + face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=True), + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution' : self.resolution, 'motion_blur':(25, 1) }, + { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_M), 'resolution': self.resolution }, + ]), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=True ), + output_sample_types=[ { 'types': (t.IMG_TRANSFORMED , face_type, t.MODE_BGR_SHUFFLE), 'resolution' : self.resolution}, + ]) + ]) + + #override + def onSave(self): + self.fan_seg.save_weights() + + #override + def onTrainOneIter(self, generators_samples, generators_list): + target_src, target_src_mask = generators_samples[0] + + loss = self.fan_seg.train_on_batch( [target_src], [target_src_mask] ) + + return ( ('loss', loss), ) + + #override + def onGetPreview(self, sample): + test_A = sample[0][0][0:4] #first 4 samples + test_B = sample[1][0][0:4] #first 4 samples + + mAA = self.fan_seg.extract(test_A) + mBB = self.fan_seg.extract(test_B) + + mAA = np.repeat ( mAA, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + mAA[i], + test_A[i,:,:,0:3]*mAA[i], + ), axis=1) ) + + st2 = [] + for i in range(0, len(test_B)): + st2.append ( np.concatenate ( ( + test_B[i,:,:,0:3], + mBB[i], + test_B[i,:,:,0:3]*mBB[i], + ), axis=1) ) + + return [ ('training data', np.concatenate ( st, axis=0 ) ), + ('evaluating data', np.concatenate ( st2, axis=0 ) ), + ] diff --git a/models/Model_DEV_FANSEG/__init__.py b/models/Model_DEV_FANSEG/__init__.py index 704b01d..0188f11 100644 --- a/models/Model_DEV_FANSEG/__init__.py +++ b/models/Model_DEV_FANSEG/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_DEV_POSEEST/Model.py b/models/Model_DEV_POSEEST/Model.py index cf8bb7f..ee2b18f 100644 --- a/models/Model_DEV_POSEEST/Model.py +++ b/models/Model_DEV_POSEEST/Model.py @@ -1,122 +1,122 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from facelib import PoseEstimator -from samplelib import * -from interact import interact as io -import imagelib - -class Model(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_enable_autobackup=False, - ask_write_preview_history=False, - ask_target_iter=False, - ask_sort_by_yaw=False, - ask_random_flip=False, - ask_src_scale_mod=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - yn_str = {True:'y',False:'n'} - - default_face_type = 'f' - if is_first_run: - self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() - else: - self.options['face_type'] = self.options.get('face_type', default_face_type) - - def_train_bgr = self.options.get('train_bgr', True) - if is_first_run or ask_override: - self.options['train_bgr'] = io.input_bool ("Train bgr? (y/n, ?:help skip: %s) : " % (yn_str[def_train_bgr]), def_train_bgr) - else: - self.options['train_bgr'] = self.options.get('train_bgr', def_train_bgr) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4:64} ) - - self.resolution = 128 - self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - - - self.pose_est = PoseEstimator(self.resolution, - FaceType.toString(self.face_type), - load_weights=not self.is_first_run(), - weights_file_root=self.get_model_root_path(), - training=True) - - if self.is_training_mode: - t = SampleProcessor.Types - face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, - sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, - output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) }, - {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution }, - {'types': (t.IMG_PITCH_YAW_ROLL,)} - ]), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, - sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, - output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':self.resolution }, - {'types': (t.IMG_PITCH_YAW_ROLL,)} - ]) - ]) - - #override - def onSave(self): - self.pose_est.save_weights() - - #override - def onTrainOneIter(self, generators_samples, generators_list): - target_srcw, target_src, pitch_yaw_roll = generators_samples[0] - - bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] ) - - return ( ('bgr_loss', bgr_loss), ('pyr_loss', pyr_loss), ) - - #override - def onGetPreview(self, generators_samples): - test_src = generators_samples[0][1][0:4] #first 4 samples - test_pyr_src = generators_samples[0][2][0:4] - test_dst = generators_samples[1][0][0:4] - test_pyr_dst = generators_samples[1][1][0:4] - - h,w,c = self.resolution,self.resolution,3 - h_line = 13 - - result = [] - for name, img, pyr in [ ['training data', test_src, test_pyr_src], \ - ['evaluating data',test_dst, test_pyr_dst] ]: - bgr_pred, pyr_pred = self.pose_est.extract(img) - - hor_imgs = [] - for i in range(len(img)): - img_info = np.ones ( (h,w,c) ) * 0.1 - - i_pyr = pyr[i] - i_pyr_pred = pyr_pred[i] - lines = ["%.4f %.4f %.4f" % (i_pyr[0],i_pyr[1],i_pyr[2]), - "%.4f %.4f %.4f" % (i_pyr_pred[0],i_pyr_pred[1],i_pyr_pred[2]) ] - - lines_count = len(lines) - for ln in range(lines_count): - img_info[ ln*h_line:(ln+1)*h_line, 0:w] += \ - imagelib.get_text_image ( (h_line,w,c), lines[ln], color=[0.8]*c ) - - hor_imgs.append ( np.concatenate ( ( - img[i,:,:,0:3], - bgr_pred[i], - img_info - ), axis=1) ) - - - result += [ (name, np.concatenate (hor_imgs, axis=0)) ] - +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from facelib import PoseEstimator +from samplelib import * +from interact import interact as io +import imagelib + +class Model(ModelBase): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, + ask_enable_autobackup=False, + ask_write_preview_history=False, + ask_target_iter=False, + ask_sort_by_yaw=False, + ask_random_flip=False, + ask_src_scale_mod=False) + + #override + def onInitializeOptions(self, is_first_run, ask_override): + yn_str = {True:'y',False:'n'} + + default_face_type = 'f' + if is_first_run: + self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() + else: + self.options['face_type'] = self.options.get('face_type', default_face_type) + + def_train_bgr = self.options.get('train_bgr', True) + if is_first_run or ask_override: + self.options['train_bgr'] = io.input_bool ("Train bgr? (y/n, ?:help skip: %s) : " % (yn_str[def_train_bgr]), def_train_bgr) + else: + self.options['train_bgr'] = self.options.get('train_bgr', def_train_bgr) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {4:64} ) + + self.resolution = 128 + self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF + + + self.pose_est = PoseEstimator(self.resolution, + FaceType.toString(self.face_type), + load_weights=not self.is_first_run(), + weights_file_root=self.get_model_root_path(), + training=True) + + if self.is_training_mode: + t = SampleProcessor.Types + face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, + sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, + output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) }, + {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution }, + {'types': (t.IMG_PITCH_YAW_ROLL,)} + ]), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, + sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, + output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':self.resolution }, + {'types': (t.IMG_PITCH_YAW_ROLL,)} + ]) + ]) + + #override + def onSave(self): + self.pose_est.save_weights() + + #override + def onTrainOneIter(self, generators_samples, generators_list): + target_srcw, target_src, pitch_yaw_roll = generators_samples[0] + + bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] ) + + return ( ('bgr_loss', bgr_loss), ('pyr_loss', pyr_loss), ) + + #override + def onGetPreview(self, generators_samples): + test_src = generators_samples[0][1][0:4] #first 4 samples + test_pyr_src = generators_samples[0][2][0:4] + test_dst = generators_samples[1][0][0:4] + test_pyr_dst = generators_samples[1][1][0:4] + + h,w,c = self.resolution,self.resolution,3 + h_line = 13 + + result = [] + for name, img, pyr in [ ['training data', test_src, test_pyr_src], \ + ['evaluating data',test_dst, test_pyr_dst] ]: + bgr_pred, pyr_pred = self.pose_est.extract(img) + + hor_imgs = [] + for i in range(len(img)): + img_info = np.ones ( (h,w,c) ) * 0.1 + + i_pyr = pyr[i] + i_pyr_pred = pyr_pred[i] + lines = ["%.4f %.4f %.4f" % (i_pyr[0],i_pyr[1],i_pyr[2]), + "%.4f %.4f %.4f" % (i_pyr_pred[0],i_pyr_pred[1],i_pyr_pred[2]) ] + + lines_count = len(lines) + for ln in range(lines_count): + img_info[ ln*h_line:(ln+1)*h_line, 0:w] += \ + imagelib.get_text_image ( (h_line,w,c), lines[ln], color=[0.8]*c ) + + hor_imgs.append ( np.concatenate ( ( + img[i,:,:,0:3], + bgr_pred[i], + img_info + ), axis=1) ) + + + result += [ (name, np.concatenate (hor_imgs, axis=0)) ] + return result \ No newline at end of file diff --git a/models/Model_DEV_POSEEST/__init__.py b/models/Model_DEV_POSEEST/__init__.py index 704b01d..0188f11 100644 --- a/models/Model_DEV_POSEEST/__init__.py +++ b/models/Model_DEV_POSEEST/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_DF/Model.py b/models/Model_DF/Model.py index 92d5cbe..fb98418 100644 --- a/models/Model_DF/Model.py +++ b/models/Model_DF/Model.py @@ -1,171 +1,171 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4.5:4} ) - - ae_input_layer = Input(shape=(128, 128, 3)) - mask_layer = Input(shape=(128, 128, 1)) #same as output - - self.encoder, self.decoder_src, self.decoder_dst = self.Build(ae_input_layer) - - if not self.is_first_run(): - weights_to_load = [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - self.load_weights_safe(weights_to_load) - - rec_src = self.decoder_src(self.encoder(ae_input_layer)) - rec_dst = self.decoder_dst(self.encoder(ae_input_layer)) - self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src) - self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst) - - self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - - self.convert = K.function([ae_input_layer], rec_src) - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types=output_sample_types), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) - loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) - - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) - AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) - BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('DF', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - x, mx = self.convert ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_converter(self): - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=128, - face_type=FaceType.FULL, - base_erode_mask_modifier=30, - base_blur_mask_modifier=0) - - def Build(self, input_layer): - exec(nnlib.code_import_all, locals(), globals()) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(input_layer): - x = input_layer - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - - x = Dense(512)(Flatten()(x)) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - - return Model(input_layer, x) - - def Decoder(): - input_ = Input(shape=(16, 16, 512)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - - return Model(input_, [x,y]) - - return Encoder(input_layer), Decoder(), Decoder() +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {4.5:4} ) + + ae_input_layer = Input(shape=(128, 128, 3)) + mask_layer = Input(shape=(128, 128, 1)) #same as output + + self.encoder, self.decoder_src, self.decoder_dst = self.Build(ae_input_layer) + + if not self.is_first_run(): + weights_to_load = [ [self.encoder , 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + self.load_weights_safe(weights_to_load) + + rec_src = self.decoder_src(self.encoder(ae_input_layer)) + rec_dst = self.decoder_dst(self.encoder(ae_input_layer)) + self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src) + self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst) + + self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) + self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) + + self.convert = K.function([ae_input_layer], rec_src) + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types=output_sample_types), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types=output_sample_types) + ]) + + #override + def get_model_filename_list(self): + return [[self.encoder, 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5']] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, sample, generators_list): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('DF', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + x, mx = self.convert ( [ face[np.newaxis,...] ] ) + return x[0], mx[0][...,0] + + #override + def get_converter(self): + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=128, + face_type=FaceType.FULL, + base_erode_mask_modifier=30, + base_blur_mask_modifier=0) + + def Build(self, input_layer): + exec(nnlib.code_import_all, locals(), globals()) + + def downscale (dim): + def func(x): + return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) + return func + + def upscale (dim): + def func(x): + return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) + return func + + def Encoder(input_layer): + x = input_layer + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + + x = Dense(512)(Flatten()(x)) + x = Dense(8 * 8 * 512)(x) + x = Reshape((8, 8, 512))(x) + x = upscale(512)(x) + + return Model(input_layer, x) + + def Decoder(): + input_ = Input(shape=(16, 16, 512)) + x = input_ + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + + y = input_ #mask decoder + y = upscale(512)(y) + y = upscale(256)(y) + y = upscale(128)(y) + + x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + + return Model(input_, [x,y]) + + return Encoder(input_layer), Decoder(), Decoder() diff --git a/models/Model_DF/__init__.py b/models/Model_DF/__init__.py index 704b01d..0188f11 100644 --- a/models/Model_DF/__init__.py +++ b/models/Model_DF/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_H128/Model.py b/models/Model_H128/Model.py index a50b930..903014b 100644 --- a/models/Model_H128/Model.py +++ b/models/Model_H128/Model.py @@ -1,205 +1,205 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run: - self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") - else: - default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future - if 'created_vram_gb' in self.options.keys(): - self.options.pop ('created_vram_gb') - self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) - - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {2.5:4} ) - - bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build( self.options['lighter_ae'] ) - if not self.is_first_run(): - weights_to_load = [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - self.load_weights_safe(weights_to_load) - - input_src_bgr = Input(bgr_shape) - input_src_mask = Input(mask_shape) - input_dst_bgr = Input(bgr_shape) - input_dst_mask = Input(mask_shape) - - rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) - rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) - - self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) - - self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), - loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) - - self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) - self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':128} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types=output_sample_types ), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types ) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_mask, warped_dst, target_dst_mask], [target_src, target_src_mask, target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.src_view([test_A]) - AB, mAB = self.src_view([test_B]) - BB, mBB = self.dst_view([test_B]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('H128', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - x, mx = self.src_view ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_converter(self): - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=128, - face_type=FaceType.HALF, - base_erode_mask_modifier=100, - base_blur_mask_modifier=100) - - def Build(self, lighter_ae): - exec(nnlib.code_import_all, locals(), globals()) - - bgr_shape = (128, 128, 3) - mask_shape = (128, 128, 1) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(input_shape): - input_layer = Input(input_shape) - x = input_layer - if not lighter_ae: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Dense(512)(Flatten()(x)) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - else: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Dense(256)(Flatten()(x)) - x = Dense(8 * 8 * 256)(x) - x = Reshape((8, 8, 256))(x) - x = upscale(256)(x) - - return Model(input_layer, x) - - def Decoder(): - if not lighter_ae: - input_ = Input(shape=(16, 16, 512)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - else: - input_ = Input(shape=(16, 16, 256)) - x = input_ - x = upscale(256)(x) - x = upscale(128)(x) - x = upscale(64)(x) - - y = input_ #mask decoder - y = upscale(256)(y) - y = upscale(128)(y) - y = upscale(64)(y) - - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - - - return Model(input_, [x,y]) - - return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run: + self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") + else: + default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future + if 'created_vram_gb' in self.options.keys(): + self.options.pop ('created_vram_gb') + self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) + + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {2.5:4} ) + + bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build( self.options['lighter_ae'] ) + if not self.is_first_run(): + weights_to_load = [ [self.encoder , 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + self.load_weights_safe(weights_to_load) + + input_src_bgr = Input(bgr_shape) + input_src_mask = Input(mask_shape) + input_dst_bgr = Input(bgr_shape) + input_dst_mask = Input(mask_shape) + + rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) + rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) + + self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) + + self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), + loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) + + self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) + self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':128} ] + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types=output_sample_types ), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types=output_sample_types ) + ]) + + #override + def get_model_filename_list(self): + return [[self.encoder, 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5']] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, sample, generators_list): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_mask, warped_dst, target_dst_mask], [target_src, target_src_mask, target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.src_view([test_A]) + AB, mAB = self.src_view([test_B]) + BB, mBB = self.dst_view([test_B]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('H128', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + x, mx = self.src_view ( [ face[np.newaxis,...] ] ) + return x[0], mx[0][...,0] + + #override + def get_converter(self): + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=128, + face_type=FaceType.HALF, + base_erode_mask_modifier=100, + base_blur_mask_modifier=100) + + def Build(self, lighter_ae): + exec(nnlib.code_import_all, locals(), globals()) + + bgr_shape = (128, 128, 3) + mask_shape = (128, 128, 1) + + def downscale (dim): + def func(x): + return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) + return func + + def upscale (dim): + def func(x): + return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) + return func + + def Encoder(input_shape): + input_layer = Input(input_shape) + x = input_layer + if not lighter_ae: + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + x = Dense(512)(Flatten()(x)) + x = Dense(8 * 8 * 512)(x) + x = Reshape((8, 8, 512))(x) + x = upscale(512)(x) + else: + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + x = Dense(256)(Flatten()(x)) + x = Dense(8 * 8 * 256)(x) + x = Reshape((8, 8, 256))(x) + x = upscale(256)(x) + + return Model(input_layer, x) + + def Decoder(): + if not lighter_ae: + input_ = Input(shape=(16, 16, 512)) + x = input_ + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + + y = input_ #mask decoder + y = upscale(512)(y) + y = upscale(256)(y) + y = upscale(128)(y) + else: + input_ = Input(shape=(16, 16, 256)) + x = input_ + x = upscale(256)(x) + x = upscale(128)(x) + x = upscale(64)(x) + + y = input_ #mask decoder + y = upscale(256)(y) + y = upscale(128)(y) + y = upscale(64)(y) + + x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + + + return Model(input_, [x,y]) + + return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() diff --git a/models/Model_H128/__init__.py b/models/Model_H128/__init__.py index 704b01d..0188f11 100644 --- a/models/Model_H128/__init__.py +++ b/models/Model_H128/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_H64/Model.py b/models/Model_H64/Model.py index f07bd05..76fa5ff 100644 --- a/models/Model_H64/Model.py +++ b/models/Model_H64/Model.py @@ -1,202 +1,202 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run: - self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") - else: - default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future - if 'created_vram_gb' in self.options.keys(): - self.options.pop ('created_vram_gb') - self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) - - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {1.5:4} ) - - - bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.options['lighter_ae']) - - if not self.is_first_run(): - weights_to_load = [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - self.load_weights_safe(weights_to_load) - - input_src_bgr = Input(bgr_shape) - input_src_mask = Input(mask_shape) - input_dst_bgr = Input(bgr_shape) - input_dst_mask = Input(mask_shape) - - rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) - rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) - - self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) - - self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) - - self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) - self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':64} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types=output_sample_types), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_full_mask = sample[0] - warped_dst, target_dst, target_dst_full_mask = sample[1] - - total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_full_mask, warped_dst, target_dst_full_mask], [target_src, target_src_full_mask, target_dst, target_dst_full_mask] ) - - return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.src_view([test_A]) - AB, mAB = self.src_view([test_B]) - BB, mBB = self.dst_view([test_B]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('H64', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - x, mx = self.src_view ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_converter(self): - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=64, - face_type=FaceType.HALF, - base_erode_mask_modifier=100, - base_blur_mask_modifier=100) - - def Build(self, lighter_ae): - exec(nnlib.code_import_all, locals(), globals()) - - bgr_shape = (64, 64, 3) - mask_shape = (64, 64, 1) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(input_shape): - input_layer = Input(input_shape) - x = input_layer - if not lighter_ae: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Dense(1024)(Flatten()(x)) - x = Dense(4 * 4 * 1024)(x) - x = Reshape((4, 4, 1024))(x) - x = upscale(512)(x) - else: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(768)(x) - x = Dense(512)(Flatten()(x)) - x = Dense(4 * 4 * 512)(x) - x = Reshape((4, 4, 512))(x) - x = upscale(256)(x) - return Model(input_layer, x) - - def Decoder(): - if not lighter_ae: - input_ = Input(shape=(8, 8, 512)) - x = input_ - - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - else: - input_ = Input(shape=(8, 8, 256)) - - x = input_ - x = upscale(256)(x) - x = upscale(128)(x) - x = upscale(64)(x) - - y = input_ #mask decoder - y = upscale(256)(y) - y = upscale(128)(y) - y = upscale(64)(y) - - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - - return Model(input_, [x,y]) - - return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run: + self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") + else: + default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future + if 'created_vram_gb' in self.options.keys(): + self.options.pop ('created_vram_gb') + self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) + + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {1.5:4} ) + + + bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.options['lighter_ae']) + + if not self.is_first_run(): + weights_to_load = [ [self.encoder , 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + self.load_weights_safe(weights_to_load) + + input_src_bgr = Input(bgr_shape) + input_src_mask = Input(mask_shape) + input_dst_bgr = Input(bgr_shape) + input_dst_mask = Input(mask_shape) + + rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) + rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) + + self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) + + self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) + + self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) + self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':64} ] + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types=output_sample_types), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types=output_sample_types) + ]) + + #override + def get_model_filename_list(self): + return [[self.encoder, 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5']] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, sample, generators_list): + warped_src, target_src, target_src_full_mask = sample[0] + warped_dst, target_dst, target_dst_full_mask = sample[1] + + total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_full_mask, warped_dst, target_dst_full_mask], [target_src, target_src_full_mask, target_dst, target_dst_full_mask] ) + + return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.src_view([test_A]) + AB, mAB = self.src_view([test_B]) + BB, mBB = self.dst_view([test_B]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('H64', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + x, mx = self.src_view ( [ face[np.newaxis,...] ] ) + return x[0], mx[0][...,0] + + #override + def get_converter(self): + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=64, + face_type=FaceType.HALF, + base_erode_mask_modifier=100, + base_blur_mask_modifier=100) + + def Build(self, lighter_ae): + exec(nnlib.code_import_all, locals(), globals()) + + bgr_shape = (64, 64, 3) + mask_shape = (64, 64, 1) + + def downscale (dim): + def func(x): + return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) + return func + + def upscale (dim): + def func(x): + return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) + return func + + def Encoder(input_shape): + input_layer = Input(input_shape) + x = input_layer + if not lighter_ae: + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + x = Dense(1024)(Flatten()(x)) + x = Dense(4 * 4 * 1024)(x) + x = Reshape((4, 4, 1024))(x) + x = upscale(512)(x) + else: + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(768)(x) + x = Dense(512)(Flatten()(x)) + x = Dense(4 * 4 * 512)(x) + x = Reshape((4, 4, 512))(x) + x = upscale(256)(x) + return Model(input_layer, x) + + def Decoder(): + if not lighter_ae: + input_ = Input(shape=(8, 8, 512)) + x = input_ + + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + + else: + input_ = Input(shape=(8, 8, 256)) + + x = input_ + x = upscale(256)(x) + x = upscale(128)(x) + x = upscale(64)(x) + + y = input_ #mask decoder + y = upscale(256)(y) + y = upscale(128)(y) + y = upscale(64)(y) + + x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + + return Model(input_, [x,y]) + + return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() diff --git a/models/Model_H64/__init__.py b/models/Model_H64/__init__.py index 704b01d..0188f11 100644 --- a/models/Model_H64/__init__.py +++ b/models/Model_H64/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_LIAEF128/Model.py b/models/Model_LIAEF128/Model.py index 36f5658..5fc03af 100644 --- a/models/Model_LIAEF128/Model.py +++ b/models/Model_LIAEF128/Model.py @@ -1,180 +1,180 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4.5:4} ) - - ae_input_layer = Input(shape=(128, 128, 3)) - mask_layer = Input(shape=(128, 128, 1)) #same as output - - self.encoder, self.decoder, self.inter_B, self.inter_AB = self.Build(ae_input_layer) - - if not self.is_first_run(): - weights_to_load = [ [self.encoder, 'encoder.h5'], - [self.decoder, 'decoder.h5'], - [self.inter_B, 'inter_B.h5'], - [self.inter_AB, 'inter_AB.h5'] - ] - self.load_weights_safe(weights_to_load) - - code = self.encoder(ae_input_layer) - AB = self.inter_AB(code) - B = self.inter_B(code) - rec_src = self.decoder(Concatenate()([AB, AB])) - rec_dst = self.decoder(Concatenate()([B, AB])) - self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src ) - self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst ) - - self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - - self.convert = K.function([ae_input_layer],rec_src) - - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types=output_sample_types), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder, 'decoder.h5'], - [self.inter_B, 'inter_B.h5'], - [self.inter_AB, 'inter_AB.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) - loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) - - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) - AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) - BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('LIAEF128', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - x, mx = self.convert ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_converter(self): - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=128, - face_type=FaceType.FULL, - base_erode_mask_modifier=30, - base_blur_mask_modifier=0) - - def Build(self, input_layer): - exec(nnlib.code_import_all, locals(), globals()) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(): - x = input_layer - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Flatten()(x) - return Model(input_layer, x) - - def Intermediate(): - input_layer = Input(shape=(None, 8 * 8 * 1024)) - x = input_layer - x = Dense(256)(x) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - return Model(input_layer, x) - - def Decoder(): - input_ = Input(shape=(16, 16, 1024)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid' )(y) - - return Model(input_, [x,y]) - - return Encoder(), Decoder(), Intermediate(), Intermediate() +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +class Model(ModelBase): + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + self.set_vram_batch_requirements( {4.5:4} ) + + ae_input_layer = Input(shape=(128, 128, 3)) + mask_layer = Input(shape=(128, 128, 1)) #same as output + + self.encoder, self.decoder, self.inter_B, self.inter_AB = self.Build(ae_input_layer) + + if not self.is_first_run(): + weights_to_load = [ [self.encoder, 'encoder.h5'], + [self.decoder, 'decoder.h5'], + [self.inter_B, 'inter_B.h5'], + [self.inter_AB, 'inter_AB.h5'] + ] + self.load_weights_safe(weights_to_load) + + code = self.encoder(ae_input_layer) + AB = self.inter_AB(code) + B = self.inter_B(code) + rec_src = self.decoder(Concatenate()([AB, AB])) + rec_dst = self.decoder(Concatenate()([B, AB])) + self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src ) + self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst ) + + self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) + self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) + + self.convert = K.function([ae_input_layer],rec_src) + + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, + { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types=output_sample_types), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types=output_sample_types) + ]) + + #override + def get_model_filename_list(self): + return [[self.encoder, 'encoder.h5'], + [self.decoder, 'decoder.h5'], + [self.inter_B, 'inter_B.h5'], + [self.inter_AB, 'inter_AB.h5']] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, sample, generators_list): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('LIAEF128', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + x, mx = self.convert ( [ face[np.newaxis,...] ] ) + return x[0], mx[0][...,0] + + #override + def get_converter(self): + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=128, + face_type=FaceType.FULL, + base_erode_mask_modifier=30, + base_blur_mask_modifier=0) + + def Build(self, input_layer): + exec(nnlib.code_import_all, locals(), globals()) + + def downscale (dim): + def func(x): + return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) + return func + + def upscale (dim): + def func(x): + return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) + return func + + def Encoder(): + x = input_layer + x = downscale(128)(x) + x = downscale(256)(x) + x = downscale(512)(x) + x = downscale(1024)(x) + x = Flatten()(x) + return Model(input_layer, x) + + def Intermediate(): + input_layer = Input(shape=(None, 8 * 8 * 1024)) + x = input_layer + x = Dense(256)(x) + x = Dense(8 * 8 * 512)(x) + x = Reshape((8, 8, 512))(x) + x = upscale(512)(x) + return Model(input_layer, x) + + def Decoder(): + input_ = Input(shape=(16, 16, 1024)) + x = input_ + x = upscale(512)(x) + x = upscale(256)(x) + x = upscale(128)(x) + x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + + y = input_ #mask decoder + y = upscale(512)(y) + y = upscale(256)(y) + y = upscale(128)(y) + y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid' )(y) + + return Model(input_, [x,y]) + + return Encoder(), Decoder(), Intermediate(), Intermediate() diff --git a/models/Model_LIAEF128/__init__.py b/models/Model_LIAEF128/__init__.py index 704b01d..0188f11 100644 --- a/models/Model_LIAEF128/__init__.py +++ b/models/Model_LIAEF128/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/Model_RecycleGAN/Model.py b/models/Model_RecycleGAN/Model.py index b78aece..6f9d1c8 100644 --- a/models/Model_RecycleGAN/Model.py +++ b/models/Model_RecycleGAN/Model.py @@ -1,483 +1,483 @@ -from functools import partial - -import cv2 -import numpy as np - -from facelib import FaceType -from interact import interact as io -from mathlib import get_power_of_two -from models import ModelBase -from nnlib import nnlib -from samplelib import * - -class RecycleGANModel(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_sort_by_yaw=False, - ask_random_flip=False, - ask_src_scale_mod=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run: - self.options['resolution'] = io.input_int("Resolution ( 128,256 ?:help skip:128) : ", 128, [128,256], help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") - else: - self.options['resolution'] = self.options.get('resolution', 128) - - #override - def onInitialize(self, batch_size=-1, **in_options): - exec(nnlib.code_import_all, locals(), globals()) - self.set_vram_batch_requirements({6:16}) - - resolution = self.options['resolution'] - bgr_shape = (resolution, resolution, 3) - ngf = 64 - npf = 32 - ndf = 64 - lambda_A = 10 - lambda_B = 10 - - use_batch_norm = True #created_batch_size > 1 - self.GA = modelify(RecycleGANModel.ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) - self.GB = modelify(RecycleGANModel.ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) - - #self.GA = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) - #self.GB = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) - - self.PA = modelify(RecycleGANModel.UNetTemporalPredictor(bgr_shape[2], use_batch_norm, ngf=npf))([Input(bgr_shape), Input(bgr_shape)]) - self.PB = modelify(RecycleGANModel.UNetTemporalPredictor(bgr_shape[2], use_batch_norm, ngf=npf))([Input(bgr_shape), Input(bgr_shape)]) - - self.DA = modelify(RecycleGANModel.PatchDiscriminator(ndf=ndf) ) (Input(bgr_shape)) - self.DB = modelify(RecycleGANModel.PatchDiscriminator(ndf=ndf) ) (Input(bgr_shape)) - - if not self.is_first_run(): - weights_to_load = [ - (self.GA, 'GA.h5'), - (self.DA, 'DA.h5'), - (self.PA, 'PA.h5'), - (self.GB, 'GB.h5'), - (self.DB, 'DB.h5'), - (self.PB, 'PB.h5'), - ] - self.load_weights_safe(weights_to_load) - - real_A0 = Input(bgr_shape, name="real_A0") - real_A1 = Input(bgr_shape, name="real_A1") - real_A2 = Input(bgr_shape, name="real_A2") - - real_B0 = Input(bgr_shape, name="real_B0") - real_B1 = Input(bgr_shape, name="real_B1") - real_B2 = Input(bgr_shape, name="real_B2") - - DA_ones = K.ones_like ( K.shape(self.DA.outputs[0]) ) - DA_zeros = K.zeros_like ( K.shape(self.DA.outputs[0] )) - DB_ones = K.ones_like ( K.shape(self.DB.outputs[0] )) - DB_zeros = K.zeros_like ( K.shape(self.DB.outputs[0] )) - - def DLoss(labels,logits): - return K.mean(K.binary_crossentropy(labels,logits)) - - def CycleLoss (t1,t2): - return K.mean(K.abs(t1 - t2)) - - def RecurrentLOSS(t1,t2): - return K.mean(K.abs(t1 - t2)) - - def RecycleLOSS(t1,t2): - return K.mean(K.abs(t1 - t2)) - - fake_B0 = self.GA(real_A0) - fake_B1 = self.GA(real_A1) - - fake_A0 = self.GB(real_B0) - fake_A1 = self.GB(real_B1) - - real_A0_d = self.DA(real_A0) - real_A0_d_ones = K.ones_like(real_A0_d) - real_A1_d = self.DA(real_A1) - real_A1_d_ones = K.ones_like(real_A1_d) - - fake_A0_d = self.DA(fake_A0) - fake_A0_d_ones = K.ones_like(fake_A0_d) - fake_A0_d_zeros = K.zeros_like(fake_A0_d) - - fake_A1_d = self.DA(fake_A1) - fake_A1_d_ones = K.ones_like(fake_A1_d) - fake_A1_d_zeros = K.zeros_like(fake_A1_d) - - real_B0_d = self.DB(real_B0) - real_B0_d_ones = K.ones_like(real_B0_d) - - real_B1_d = self.DB(real_B1) - real_B1_d_ones = K.ones_like(real_B1_d) - - fake_B0_d = self.DB(fake_B0) - fake_B0_d_ones = K.ones_like(fake_B0_d) - fake_B0_d_zeros = K.zeros_like(fake_B0_d) - - fake_B1_d = self.DB(fake_B1) - fake_B1_d_ones = K.ones_like(fake_B1_d) - fake_B1_d_zeros = K.zeros_like(fake_B1_d) - - pred_A2 = self.PA ( [real_A0, real_A1]) - pred_B2 = self.PB ( [real_B0, real_B1]) - rec_A2 = self.GB ( self.PB ( [fake_B0, fake_B1]) ) - rec_B2 = self.GA ( self.PA ( [fake_A0, fake_A1])) - - - loss_GA = DLoss(fake_B0_d_ones, fake_B0_d ) + \ - DLoss(fake_B1_d_ones, fake_B1_d ) + \ - lambda_A * (RecurrentLOSS(pred_A2, real_A2) + \ - RecycleLOSS(rec_B2, real_B2) ) - - - weights_GA = self.GA.trainable_weights + self.PA.trainable_weights - - loss_GB = DLoss(fake_A0_d_ones, fake_A0_d ) + \ - DLoss(fake_A1_d_ones, fake_A1_d ) + \ - lambda_B * (RecurrentLOSS(pred_B2, real_B2) + \ - RecycleLOSS(rec_A2, real_A2) ) - - weights_GB = self.GB.trainable_weights + self.PB.trainable_weights - - def opt(): - return Adam(lr=2e-4, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2)#, clipnorm=1) - - self.GA_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_GA], - opt().get_updates(loss_GA, weights_GA) ) - - self.GB_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_GB], - opt().get_updates(loss_GB, weights_GB) ) - - ########### - - loss_D_A0 = ( DLoss(real_A0_d_ones, real_A0_d ) + \ - DLoss(fake_A0_d_zeros, fake_A0_d ) ) * 0.5 - - loss_D_A1 = ( DLoss(real_A1_d_ones, real_A1_d ) + \ - DLoss(fake_A1_d_zeros, fake_A1_d ) ) * 0.5 - - loss_D_A = loss_D_A0 + loss_D_A1 - - self.DA_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_A], - opt().get_updates(loss_D_A, self.DA.trainable_weights) ) - - ############ - - loss_D_B0 = ( DLoss(real_B0_d_ones, real_B0_d ) + \ - DLoss(fake_B0_d_zeros, fake_B0_d ) ) * 0.5 - - loss_D_B1 = ( DLoss(real_B1_d_ones, real_B1_d ) + \ - DLoss(fake_B1_d_zeros, fake_B1_d ) ) * 0.5 - - loss_D_B = loss_D_B0 + loss_D_B1 - - self.DB_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_B], - opt().get_updates(loss_D_B, self.DB.trainable_weights) ) - - ############ - - - self.G_view = K.function([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 ]) - - - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_SOURCE, t.MODE_BGR), 'resolution':resolution, 'normalize_tanh' : True} ] - - self.set_training_data_generators ([ - SampleGeneratorImageTemporal(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - temporal_image_count=3, - sample_process_options=SampleProcessor.Options(random_flip = False), - output_sample_types=output_sample_types ), - - SampleGeneratorImageTemporal(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - temporal_image_count=3, - sample_process_options=SampleProcessor.Options(random_flip = False), - output_sample_types=output_sample_types ), - ]) - else: - self.G_convert = K.function([real_B0],[fake_A0]) - - #override - def get_model_filename_list(self): - return [ [self.GA, 'GA.h5'], - [self.GB, 'GB.h5'], - [self.DA, 'DA.h5'], - [self.DB, 'DB.h5'], - [self.PA, 'PA.h5'], - [self.PB, 'PB.h5'] ] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, generators_samples, generators_list): - source_src_0, source_src_1, source_src_2, = generators_samples[0] - source_dst_0, source_dst_1, source_dst_2, = generators_samples[1] - - feed = [source_src_0, source_src_1, source_src_2, source_dst_0, source_dst_1, source_dst_2] - - loss_GA, = self.GA_train ( feed ) - loss_GB, = self.GB_train ( feed ) - loss_DA, = self.DA_train( feed ) - loss_DB, = self.DB_train( feed ) - - return ( ('GA', loss_GA), ('GB', loss_GB), ('DA', loss_DA), ('DB', loss_DB) ) - - #override - def onGetPreview(self, sample): - test_A0 = sample[0][0] - test_A1 = sample[0][1] - test_A2 = sample[0][2] - - test_B0 = sample[1][0] - test_B1 = sample[1][1] - test_B2 = sample[1][2] - - G_view_result = self.G_view([test_A0, test_A1, test_A2, test_B0, test_B1, test_B2]) - - fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 = [ x[0] / 2 + 0.5 for x in G_view_result] - test_A0, test_A1, test_A2, test_B0, test_B1, test_B2 = [ x[0] / 2 + 0.5 for x in [test_A0, test_A1, test_A2, test_B0, test_B1, test_B2] ] - - r = np.concatenate ((np.concatenate ( (test_A0, test_A1, test_A2, pred_A2, fake_B0, fake_B1, rec_A2), axis=1), - np.concatenate ( (test_B0, test_B1, test_B2, pred_B2, fake_A0, fake_A1, rec_B2), axis=1) - ), axis=0) - - return [ ('RecycleGAN', r ) ] - - def predictor_func (self, face): - x = self.G_convert ( [ face[np.newaxis,...]*2-1 ] )[0] - return np.clip ( x[0] / 2 + 0.5 , 0, 1) - - #override - def get_converter(self, **in_options): - from converters import ConverterImage - return ConverterImage(self.predictor_func, - predictor_input_size=self.options['resolution'], - **in_options) - - @staticmethod - def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=-1)(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=-1)(x) - - XConv2D = partial(Conv2D, padding='same', use_bias=use_bias) - XConv2DTranspose = partial(Conv2DTranspose, padding='same', use_bias=use_bias) - - def func(input): - - - def ResnetBlock(dim, use_dropout=False): - def func(input): - x = input - - x = XConv2D(dim, 3, strides=1)(x) - x = XNormalization(x) - x = ReLU()(x) - - if use_dropout: - x = Dropout(0.5)(x) - - x = XConv2D(dim, 3, strides=1)(x) - x = XNormalization(x) - x = ReLU()(x) - return Add()([x,input]) - return func - - x = input - - x = ReLU()(XNormalization(XConv2D(ngf, 7, strides=1)(x))) - - x = ReLU()(XNormalization(XConv2D(ngf*2, 3, strides=2)(x))) - x = ReLU()(XNormalization(XConv2D(ngf*4, 3, strides=2)(x))) - - for i in range(n_blocks): - x = ResnetBlock(ngf*4, use_dropout=use_dropout)(x) - - x = ReLU()(XNormalization(XConv2DTranspose(ngf*2, 3, strides=2)(x))) - x = ReLU()(XNormalization(XConv2DTranspose(ngf , 3, strides=2)(x))) - - x = XConv2D(output_nc, 7, strides=1, activation='tanh', use_bias=True)(x) - - return x - - return func - - @staticmethod - def UNet(output_nc, use_batch_norm, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalizationL(): - return InstanceNormalization (axis=-1) - else: - use_bias = False - def XNormalizationL(): - return BatchNormalization (axis=-1) - - def XNormalization(x): - return XNormalizationL()(x) - - XConv2D = partial(Conv2D, padding='same', use_bias=use_bias) - XConv2DTranspose = partial(Conv2DTranspose, padding='same', use_bias=use_bias) - - def func(input): - - b,h,w,c = K.int_shape(input) - - n_downs = get_power_of_two(w) - 4 - - Norm = XNormalizationL() - Norm2 = XNormalizationL() - Norm4 = XNormalizationL() - Norm8 = XNormalizationL() - - x = input - - x = e1 = XConv2D( ngf, 4, strides=2, use_bias=True ) (x) - - x = e2 = Norm2( XConv2D( ngf*2, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) - x = e3 = Norm4( XConv2D( ngf*4, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) - - l = [] - for i in range(n_downs): - x = Norm8( XConv2D( ngf*8, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) - l += [x] - - x = XConv2D( ngf*8, 4, strides=2, use_bias=True )( LeakyReLU(0.2)(x) ) - - for i in range(n_downs): - x = Norm8( XConv2DTranspose( ngf*8, 4, strides=2 )( ReLU()(x) ) ) - if i <= n_downs-2: - x = Dropout(0.5)(x) - x = Concatenate(axis=-1)([x, l[-i-1] ]) - - x = Norm4( XConv2DTranspose( ngf*4, 4, strides=2 )( ReLU()(x) ) ) - x = Concatenate(axis=-1)([x, e3]) - - x = Norm2( XConv2DTranspose( ngf*2, 4, strides=2 )( ReLU()(x) ) ) - x = Concatenate(axis=-1)([x, e2]) - - x = Norm( XConv2DTranspose( ngf, 4, strides=2 )( ReLU()(x) ) ) - x = Concatenate(axis=-1)([x, e1]) - - x = XConv2DTranspose(output_nc, 4, strides=2, activation='tanh', use_bias=True)( ReLU()(x) ) - - return x - return func - nnlib.UNet = UNet - - @staticmethod - def UNetTemporalPredictor(output_nc, use_batch_norm, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - def func(inputs): - past_2_image_tensor, past_1_image_tensor = inputs - - x = Concatenate(axis=-1)([ past_2_image_tensor, past_1_image_tensor ]) - x = UNet(3, use_batch_norm, ngf=ngf, use_dropout=use_dropout) (x) - - return x - - return func - - @staticmethod - def PatchDiscriminator(ndf=64): - exec (nnlib.import_all(), locals(), globals()) - - #use_bias = True - #def XNormalization(x): - # return InstanceNormalization (axis=-1)(x) - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=-1)(x) - - XConv2D = partial(Conv2D, use_bias=use_bias) - - def func(input): - b,h,w,c = K.int_shape(input) - - x = input - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf, 4, strides=2, padding='valid', use_bias=True)(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf*2, 4, strides=2, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf*4, 4, strides=2, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf*8, 4, strides=2, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( ndf*8, 4, strides=2, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - return XConv2D( 1, 4, strides=1, padding='valid', use_bias=True, activation='sigmoid')(x)# - return func - - @staticmethod - def NLayerDiscriminator(ndf=64, n_layers=3): - exec (nnlib.import_all(), locals(), globals()) - - #use_bias = True - #def XNormalization(x): - # return InstanceNormalization (axis=-1)(x) - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=-1)(x) - - XConv2D = partial(Conv2D, use_bias=use_bias) - - def func(input): - b,h,w,c = K.int_shape(input) - - x = input - - f = ndf - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( f, 4, strides=2, padding='valid', use_bias=True)(x) - f = min( ndf*8, f*2 ) - x = LeakyReLU(0.2)(x) - - for i in range(n_layers): - x = ZeroPadding2D((1,1))(x) - x = XConv2D( f, 4, strides=2, padding='valid')(x) - f = min( ndf*8, f*2 ) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = XConv2D( f, 4, strides=1, padding='valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - return XConv2D( 1, 4, strides=1, padding='valid', use_bias=True, activation='sigmoid')(x)# - return func - -Model = RecycleGANModel +from functools import partial + +import cv2 +import numpy as np + +from facelib import FaceType +from interact import interact as io +from mathlib import get_power_of_two +from models import ModelBase +from nnlib import nnlib +from samplelib import * + +class RecycleGANModel(ModelBase): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, + ask_sort_by_yaw=False, + ask_random_flip=False, + ask_src_scale_mod=False) + + #override + def onInitializeOptions(self, is_first_run, ask_override): + if is_first_run: + self.options['resolution'] = io.input_int("Resolution ( 128,256 ?:help skip:128) : ", 128, [128,256], help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") + else: + self.options['resolution'] = self.options.get('resolution', 128) + + #override + def onInitialize(self, batch_size=-1, **in_options): + exec(nnlib.code_import_all, locals(), globals()) + self.set_vram_batch_requirements({6:16}) + + resolution = self.options['resolution'] + bgr_shape = (resolution, resolution, 3) + ngf = 64 + npf = 32 + ndf = 64 + lambda_A = 10 + lambda_B = 10 + + use_batch_norm = True #created_batch_size > 1 + self.GA = modelify(RecycleGANModel.ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) + self.GB = modelify(RecycleGANModel.ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) + + #self.GA = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) + #self.GB = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) + + self.PA = modelify(RecycleGANModel.UNetTemporalPredictor(bgr_shape[2], use_batch_norm, ngf=npf))([Input(bgr_shape), Input(bgr_shape)]) + self.PB = modelify(RecycleGANModel.UNetTemporalPredictor(bgr_shape[2], use_batch_norm, ngf=npf))([Input(bgr_shape), Input(bgr_shape)]) + + self.DA = modelify(RecycleGANModel.PatchDiscriminator(ndf=ndf) ) (Input(bgr_shape)) + self.DB = modelify(RecycleGANModel.PatchDiscriminator(ndf=ndf) ) (Input(bgr_shape)) + + if not self.is_first_run(): + weights_to_load = [ + (self.GA, 'GA.h5'), + (self.DA, 'DA.h5'), + (self.PA, 'PA.h5'), + (self.GB, 'GB.h5'), + (self.DB, 'DB.h5'), + (self.PB, 'PB.h5'), + ] + self.load_weights_safe(weights_to_load) + + real_A0 = Input(bgr_shape, name="real_A0") + real_A1 = Input(bgr_shape, name="real_A1") + real_A2 = Input(bgr_shape, name="real_A2") + + real_B0 = Input(bgr_shape, name="real_B0") + real_B1 = Input(bgr_shape, name="real_B1") + real_B2 = Input(bgr_shape, name="real_B2") + + DA_ones = K.ones_like ( K.shape(self.DA.outputs[0]) ) + DA_zeros = K.zeros_like ( K.shape(self.DA.outputs[0] )) + DB_ones = K.ones_like ( K.shape(self.DB.outputs[0] )) + DB_zeros = K.zeros_like ( K.shape(self.DB.outputs[0] )) + + def DLoss(labels,logits): + return K.mean(K.binary_crossentropy(labels,logits)) + + def CycleLoss (t1,t2): + return K.mean(K.abs(t1 - t2)) + + def RecurrentLOSS(t1,t2): + return K.mean(K.abs(t1 - t2)) + + def RecycleLOSS(t1,t2): + return K.mean(K.abs(t1 - t2)) + + fake_B0 = self.GA(real_A0) + fake_B1 = self.GA(real_A1) + + fake_A0 = self.GB(real_B0) + fake_A1 = self.GB(real_B1) + + real_A0_d = self.DA(real_A0) + real_A0_d_ones = K.ones_like(real_A0_d) + real_A1_d = self.DA(real_A1) + real_A1_d_ones = K.ones_like(real_A1_d) + + fake_A0_d = self.DA(fake_A0) + fake_A0_d_ones = K.ones_like(fake_A0_d) + fake_A0_d_zeros = K.zeros_like(fake_A0_d) + + fake_A1_d = self.DA(fake_A1) + fake_A1_d_ones = K.ones_like(fake_A1_d) + fake_A1_d_zeros = K.zeros_like(fake_A1_d) + + real_B0_d = self.DB(real_B0) + real_B0_d_ones = K.ones_like(real_B0_d) + + real_B1_d = self.DB(real_B1) + real_B1_d_ones = K.ones_like(real_B1_d) + + fake_B0_d = self.DB(fake_B0) + fake_B0_d_ones = K.ones_like(fake_B0_d) + fake_B0_d_zeros = K.zeros_like(fake_B0_d) + + fake_B1_d = self.DB(fake_B1) + fake_B1_d_ones = K.ones_like(fake_B1_d) + fake_B1_d_zeros = K.zeros_like(fake_B1_d) + + pred_A2 = self.PA ( [real_A0, real_A1]) + pred_B2 = self.PB ( [real_B0, real_B1]) + rec_A2 = self.GB ( self.PB ( [fake_B0, fake_B1]) ) + rec_B2 = self.GA ( self.PA ( [fake_A0, fake_A1])) + + + loss_GA = DLoss(fake_B0_d_ones, fake_B0_d ) + \ + DLoss(fake_B1_d_ones, fake_B1_d ) + \ + lambda_A * (RecurrentLOSS(pred_A2, real_A2) + \ + RecycleLOSS(rec_B2, real_B2) ) + + + weights_GA = self.GA.trainable_weights + self.PA.trainable_weights + + loss_GB = DLoss(fake_A0_d_ones, fake_A0_d ) + \ + DLoss(fake_A1_d_ones, fake_A1_d ) + \ + lambda_B * (RecurrentLOSS(pred_B2, real_B2) + \ + RecycleLOSS(rec_A2, real_A2) ) + + weights_GB = self.GB.trainable_weights + self.PB.trainable_weights + + def opt(): + return Adam(lr=2e-4, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2)#, clipnorm=1) + + self.GA_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_GA], + opt().get_updates(loss_GA, weights_GA) ) + + self.GB_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_GB], + opt().get_updates(loss_GB, weights_GB) ) + + ########### + + loss_D_A0 = ( DLoss(real_A0_d_ones, real_A0_d ) + \ + DLoss(fake_A0_d_zeros, fake_A0_d ) ) * 0.5 + + loss_D_A1 = ( DLoss(real_A1_d_ones, real_A1_d ) + \ + DLoss(fake_A1_d_zeros, fake_A1_d ) ) * 0.5 + + loss_D_A = loss_D_A0 + loss_D_A1 + + self.DA_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_A], + opt().get_updates(loss_D_A, self.DA.trainable_weights) ) + + ############ + + loss_D_B0 = ( DLoss(real_B0_d_ones, real_B0_d ) + \ + DLoss(fake_B0_d_zeros, fake_B0_d ) ) * 0.5 + + loss_D_B1 = ( DLoss(real_B1_d_ones, real_B1_d ) + \ + DLoss(fake_B1_d_zeros, fake_B1_d ) ) * 0.5 + + loss_D_B = loss_D_B0 + loss_D_B1 + + self.DB_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_B], + opt().get_updates(loss_D_B, self.DB.trainable_weights) ) + + ############ + + + self.G_view = K.function([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 ]) + + + + if self.is_training_mode: + t = SampleProcessor.Types + output_sample_types=[ { 'types': (t.IMG_SOURCE, t.MODE_BGR), 'resolution':resolution, 'normalize_tanh' : True} ] + + self.set_training_data_generators ([ + SampleGeneratorImageTemporal(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, + temporal_image_count=3, + sample_process_options=SampleProcessor.Options(random_flip = False), + output_sample_types=output_sample_types ), + + SampleGeneratorImageTemporal(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + temporal_image_count=3, + sample_process_options=SampleProcessor.Options(random_flip = False), + output_sample_types=output_sample_types ), + ]) + else: + self.G_convert = K.function([real_B0],[fake_A0]) + + #override + def get_model_filename_list(self): + return [ [self.GA, 'GA.h5'], + [self.GB, 'GB.h5'], + [self.DA, 'DA.h5'], + [self.DB, 'DB.h5'], + [self.PA, 'PA.h5'], + [self.PB, 'PB.h5'] ] + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, generators_samples, generators_list): + source_src_0, source_src_1, source_src_2, = generators_samples[0] + source_dst_0, source_dst_1, source_dst_2, = generators_samples[1] + + feed = [source_src_0, source_src_1, source_src_2, source_dst_0, source_dst_1, source_dst_2] + + loss_GA, = self.GA_train ( feed ) + loss_GB, = self.GB_train ( feed ) + loss_DA, = self.DA_train( feed ) + loss_DB, = self.DB_train( feed ) + + return ( ('GA', loss_GA), ('GB', loss_GB), ('DA', loss_DA), ('DB', loss_DB) ) + + #override + def onGetPreview(self, sample): + test_A0 = sample[0][0] + test_A1 = sample[0][1] + test_A2 = sample[0][2] + + test_B0 = sample[1][0] + test_B1 = sample[1][1] + test_B2 = sample[1][2] + + G_view_result = self.G_view([test_A0, test_A1, test_A2, test_B0, test_B1, test_B2]) + + fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 = [ x[0] / 2 + 0.5 for x in G_view_result] + test_A0, test_A1, test_A2, test_B0, test_B1, test_B2 = [ x[0] / 2 + 0.5 for x in [test_A0, test_A1, test_A2, test_B0, test_B1, test_B2] ] + + r = np.concatenate ((np.concatenate ( (test_A0, test_A1, test_A2, pred_A2, fake_B0, fake_B1, rec_A2), axis=1), + np.concatenate ( (test_B0, test_B1, test_B2, pred_B2, fake_A0, fake_A1, rec_B2), axis=1) + ), axis=0) + + return [ ('RecycleGAN', r ) ] + + def predictor_func (self, face): + x = self.G_convert ( [ face[np.newaxis,...]*2-1 ] )[0] + return np.clip ( x[0] / 2 + 0.5 , 0, 1) + + #override + def get_converter(self, **in_options): + from converters import ConverterImage + return ConverterImage(self.predictor_func, + predictor_input_size=self.options['resolution'], + **in_options) + + @staticmethod + def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=-1)(x) + else: + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=-1)(x) + + XConv2D = partial(Conv2D, padding='same', use_bias=use_bias) + XConv2DTranspose = partial(Conv2DTranspose, padding='same', use_bias=use_bias) + + def func(input): + + + def ResnetBlock(dim, use_dropout=False): + def func(input): + x = input + + x = XConv2D(dim, 3, strides=1)(x) + x = XNormalization(x) + x = ReLU()(x) + + if use_dropout: + x = Dropout(0.5)(x) + + x = XConv2D(dim, 3, strides=1)(x) + x = XNormalization(x) + x = ReLU()(x) + return Add()([x,input]) + return func + + x = input + + x = ReLU()(XNormalization(XConv2D(ngf, 7, strides=1)(x))) + + x = ReLU()(XNormalization(XConv2D(ngf*2, 3, strides=2)(x))) + x = ReLU()(XNormalization(XConv2D(ngf*4, 3, strides=2)(x))) + + for i in range(n_blocks): + x = ResnetBlock(ngf*4, use_dropout=use_dropout)(x) + + x = ReLU()(XNormalization(XConv2DTranspose(ngf*2, 3, strides=2)(x))) + x = ReLU()(XNormalization(XConv2DTranspose(ngf , 3, strides=2)(x))) + + x = XConv2D(output_nc, 7, strides=1, activation='tanh', use_bias=True)(x) + + return x + + return func + + @staticmethod + def UNet(output_nc, use_batch_norm, ngf=64, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalizationL(): + return InstanceNormalization (axis=-1) + else: + use_bias = False + def XNormalizationL(): + return BatchNormalization (axis=-1) + + def XNormalization(x): + return XNormalizationL()(x) + + XConv2D = partial(Conv2D, padding='same', use_bias=use_bias) + XConv2DTranspose = partial(Conv2DTranspose, padding='same', use_bias=use_bias) + + def func(input): + + b,h,w,c = K.int_shape(input) + + n_downs = get_power_of_two(w) - 4 + + Norm = XNormalizationL() + Norm2 = XNormalizationL() + Norm4 = XNormalizationL() + Norm8 = XNormalizationL() + + x = input + + x = e1 = XConv2D( ngf, 4, strides=2, use_bias=True ) (x) + + x = e2 = Norm2( XConv2D( ngf*2, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) + x = e3 = Norm4( XConv2D( ngf*4, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) + + l = [] + for i in range(n_downs): + x = Norm8( XConv2D( ngf*8, 4, strides=2 )( LeakyReLU(0.2)(x) ) ) + l += [x] + + x = XConv2D( ngf*8, 4, strides=2, use_bias=True )( LeakyReLU(0.2)(x) ) + + for i in range(n_downs): + x = Norm8( XConv2DTranspose( ngf*8, 4, strides=2 )( ReLU()(x) ) ) + if i <= n_downs-2: + x = Dropout(0.5)(x) + x = Concatenate(axis=-1)([x, l[-i-1] ]) + + x = Norm4( XConv2DTranspose( ngf*4, 4, strides=2 )( ReLU()(x) ) ) + x = Concatenate(axis=-1)([x, e3]) + + x = Norm2( XConv2DTranspose( ngf*2, 4, strides=2 )( ReLU()(x) ) ) + x = Concatenate(axis=-1)([x, e2]) + + x = Norm( XConv2DTranspose( ngf, 4, strides=2 )( ReLU()(x) ) ) + x = Concatenate(axis=-1)([x, e1]) + + x = XConv2DTranspose(output_nc, 4, strides=2, activation='tanh', use_bias=True)( ReLU()(x) ) + + return x + return func + nnlib.UNet = UNet + + @staticmethod + def UNetTemporalPredictor(output_nc, use_batch_norm, ngf=64, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + def func(inputs): + past_2_image_tensor, past_1_image_tensor = inputs + + x = Concatenate(axis=-1)([ past_2_image_tensor, past_1_image_tensor ]) + x = UNet(3, use_batch_norm, ngf=ngf, use_dropout=use_dropout) (x) + + return x + + return func + + @staticmethod + def PatchDiscriminator(ndf=64): + exec (nnlib.import_all(), locals(), globals()) + + #use_bias = True + #def XNormalization(x): + # return InstanceNormalization (axis=-1)(x) + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=-1)(x) + + XConv2D = partial(Conv2D, use_bias=use_bias) + + def func(input): + b,h,w,c = K.int_shape(input) + + x = input + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf, 4, strides=2, padding='valid', use_bias=True)(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf*2, 4, strides=2, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf*4, 4, strides=2, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf*8, 4, strides=2, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( ndf*8, 4, strides=2, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + return XConv2D( 1, 4, strides=1, padding='valid', use_bias=True, activation='sigmoid')(x)# + return func + + @staticmethod + def NLayerDiscriminator(ndf=64, n_layers=3): + exec (nnlib.import_all(), locals(), globals()) + + #use_bias = True + #def XNormalization(x): + # return InstanceNormalization (axis=-1)(x) + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=-1)(x) + + XConv2D = partial(Conv2D, use_bias=use_bias) + + def func(input): + b,h,w,c = K.int_shape(input) + + x = input + + f = ndf + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( f, 4, strides=2, padding='valid', use_bias=True)(x) + f = min( ndf*8, f*2 ) + x = LeakyReLU(0.2)(x) + + for i in range(n_layers): + x = ZeroPadding2D((1,1))(x) + x = XConv2D( f, 4, strides=2, padding='valid')(x) + f = min( ndf*8, f*2 ) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = XConv2D( f, 4, strides=1, padding='valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + return XConv2D( 1, 4, strides=1, padding='valid', use_bias=True, activation='sigmoid')(x)# + return func + +Model = RecycleGANModel diff --git a/models/Model_SAE/Model.py b/models/Model_SAE/Model.py index 80885f1..f4fd2b6 100644 --- a/models/Model_SAE/Model.py +++ b/models/Model_SAE/Model.py @@ -1,701 +1,701 @@ -from functools import partial -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -#SAE - Styled AutoEncoder -class SAEModel(ModelBase): - - encoderH5 = 'encoder.h5' - inter_BH5 = 'inter_B.h5' - inter_ABH5 = 'inter_AB.h5' - decoderH5 = 'decoder.h5' - decodermH5 = 'decoderm.h5' - - decoder_srcH5 = 'decoder_src.h5' - decoder_srcmH5 = 'decoder_srcm.h5' - decoder_dstH5 = 'decoder_dst.h5' - decoder_dstmH5 = 'decoder_dstm.h5' - - #override - def onInitializeOptions(self, is_first_run, ask_override): - yn_str = {True:'y',False:'n'} - - default_resolution = 128 - default_archi = 'df' - default_face_type = 'f' - - if is_first_run: - resolution = io.input_int("Resolution ( 64-256 ?:help skip:128) : ", default_resolution, help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") - resolution = np.clip (resolution, 64, 256) - while np.modf(resolution / 16)[0] != 0.0: - resolution -= 1 - self.options['resolution'] = resolution - - self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() - self.options['learn_mask'] = io.input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.") - else: - self.options['resolution'] = self.options.get('resolution', default_resolution) - self.options['face_type'] = self.options.get('face_type', default_face_type) - self.options['learn_mask'] = self.options.get('learn_mask', True) - - - if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend: - def_optimizer_mode = self.options.get('optimizer_mode', 1) - self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.") - else: - self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1) - - if is_first_run: - self.options['archi'] = io.input_str ("AE architecture (df, liae ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes.").lower() #-s version is slower, but has decreased change to collapse. - else: - self.options['archi'] = self.options.get('archi', default_archi) - - default_ae_dims = 256 if 'liae' in self.options['archi'] else 512 - default_e_ch_dims = 42 - default_d_ch_dims = default_e_ch_dims // 2 - def_ca_weights = False - - if is_first_run: - self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dims (32-1024 ?:help skip:%d) : " % (default_ae_dims) , default_ae_dims, help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 ) - self.options['e_ch_dims'] = np.clip ( io.input_int("Encoder dims per channel (21-85 ?:help skip:%d) : " % (default_e_ch_dims) , default_e_ch_dims, help_message="More encoder dims help to recognize more facial features, but require more VRAM. You can fine-tune model size to fit your GPU." ), 21, 85 ) - default_d_ch_dims = self.options['e_ch_dims'] // 2 - self.options['d_ch_dims'] = np.clip ( io.input_int("Decoder dims per channel (10-85 ?:help skip:%d) : " % (default_d_ch_dims) , default_d_ch_dims, help_message="More decoder dims help to get better details, but require more VRAM. You can fine-tune model size to fit your GPU." ), 10, 85 ) - self.options['multiscale_decoder'] = io.input_bool ("Use multiscale decoder? (y/n, ?:help skip:n) : ", False, help_message="Multiscale decoder helps to get better details.") - self.options['ca_weights'] = io.input_bool ("Use CA weights? (y/n, ?:help skip: %s ) : " % (yn_str[def_ca_weights]), def_ca_weights, help_message="Initialize network with 'Convolution Aware' weights. This may help to achieve a higher accuracy model, but consumes a time at first run.") - else: - self.options['ae_dims'] = self.options.get('ae_dims', default_ae_dims) - self.options['e_ch_dims'] = self.options.get('e_ch_dims', default_e_ch_dims) - self.options['d_ch_dims'] = self.options.get('d_ch_dims', default_d_ch_dims) - self.options['multiscale_decoder'] = self.options.get('multiscale_decoder', False) - self.options['ca_weights'] = self.options.get('ca_weights', def_ca_weights) - - default_face_style_power = 0.0 - default_bg_style_power = 0.0 - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: %s ) : " % (yn_str[def_pixel_loss]), def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time. Enabling this option too early increases the chance of model collapse.") - - default_face_style_power = default_face_style_power if is_first_run else self.options.get('face_style_power', default_face_style_power) - self.options['face_style_power'] = np.clip ( io.input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power, - help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) - - default_bg_style_power = default_bg_style_power if is_first_run else self.options.get('bg_style_power', default_bg_style_power) - self.options['bg_style_power'] = np.clip ( io.input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power, - help_message="Learn to transfer image around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) - - default_apply_random_ct = False if is_first_run else self.options.get('apply_random_ct', False) - self.options['apply_random_ct'] = io.input_bool ("Apply random color transfer to src faceset? (y/n, ?:help skip:%s) : " % (yn_str[default_apply_random_ct]), default_apply_random_ct, help_message="Increase variativity of src samples by apply LCT color transfer from random dst samples. It is like 'face_style' learning, but more precise color transfer and without risk of model collapse, also it does not require additional GPU resources, but the training time may be longer, due to the src faceset is becoming more diverse.") - - if nnlib.device.backend != 'plaidML': # todo https://github.com/plaidml/plaidml/issues/301 - default_clipgrad = False if is_first_run else self.options.get('clipgrad', False) - self.options['clipgrad'] = io.input_bool ("Enable gradient clipping? (y/n, ?:help skip:%s) : " % (yn_str[default_clipgrad]), default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.") - else: - self.options['clipgrad'] = False - - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power) - self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power) - self.options['apply_random_ct'] = self.options.get('apply_random_ct', False) - self.options['clipgrad'] = self.options.get('clipgrad', False) - - if is_first_run: - self.options['pretrain'] = io.input_bool ("Pretrain the model? (y/n, ?:help skip:n) : ", False, help_message="Pretrain the model with large amount of various faces. This technique may help to train the fake with overly different face shapes and light conditions of src/dst data. Face will be look more like a morphed. To reduce the morph effect, some model files will be initialized but not be updated after pretrain: LIAE: inter_AB.h5 DF: encoder.h5. The longer you pretrain the model the more morphed face will look. After that, save and run the training again.") - else: - self.options['pretrain'] = False - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - SAEModel.initialize_nn_functions() - self.set_vram_batch_requirements({1.5:4}) - - resolution = self.options['resolution'] - ae_dims = self.options['ae_dims'] - e_ch_dims = self.options['e_ch_dims'] - d_ch_dims = self.options['d_ch_dims'] - self.pretrain = self.options['pretrain'] = self.options.get('pretrain', False) - if not self.pretrain: - self.options.pop('pretrain') - - d_residual_blocks = True - bgr_shape = (resolution, resolution, 3) - mask_shape = (resolution, resolution, 1) - - self.ms_count = ms_count = 3 if (self.options['multiscale_decoder']) else 1 - - apply_random_ct = self.options.get('apply_random_ct', False) - masked_training = True - - warped_src = Input(bgr_shape) - target_src = Input(bgr_shape) - target_srcm = Input(mask_shape) - - warped_dst = Input(bgr_shape) - target_dst = Input(bgr_shape) - target_dstm = Input(mask_shape) - - target_src_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)] - target_srcm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)] - target_dst_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)] - target_dstm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)] - - common_flow_kwargs = { 'padding': 'zero', - 'norm': '', - 'act':'' } - models_list = [] - weights_to_load = [] - if 'liae' in self.options['archi']: - self.encoder = modelify(SAEModel.LIAEEncFlow(resolution, ch_dims=e_ch_dims, **common_flow_kwargs) ) (Input(bgr_shape)) - - enc_output_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ] - - self.inter_B = modelify(SAEModel.LIAEInterFlow(resolution, ae_dims=ae_dims, **common_flow_kwargs)) (enc_output_Inputs) - self.inter_AB = modelify(SAEModel.LIAEInterFlow(resolution, ae_dims=ae_dims, **common_flow_kwargs)) (enc_output_Inputs) - - inter_output_Inputs = [ Input( np.array(K.int_shape(x)[1:])*(1,1,2) ) for x in self.inter_B.outputs ] - - self.decoder = modelify(SAEModel.LIAEDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs)) (inter_output_Inputs) - models_list += [self.encoder, self.inter_B, self.inter_AB, self.decoder] - - if self.options['learn_mask']: - self.decoderm = modelify(SAEModel.LIAEDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs)) (inter_output_Inputs) - models_list += [self.decoderm] - - if not self.is_first_run(): - weights_to_load += [ [self.encoder , 'encoder.h5'], - [self.inter_B , 'inter_B.h5'], - [self.inter_AB, 'inter_AB.h5'], - [self.decoder , 'decoder.h5'], - ] - if self.options['learn_mask']: - weights_to_load += [ [self.decoderm, 'decoderm.h5'] ] - - warped_src_code = self.encoder (warped_src) - warped_src_inter_AB_code = self.inter_AB (warped_src_code) - warped_src_inter_code = Concatenate()([warped_src_inter_AB_code,warped_src_inter_AB_code]) - - warped_dst_code = self.encoder (warped_dst) - warped_dst_inter_B_code = self.inter_B (warped_dst_code) - warped_dst_inter_AB_code = self.inter_AB (warped_dst_code) - warped_dst_inter_code = Concatenate()([warped_dst_inter_B_code,warped_dst_inter_AB_code]) - - warped_src_dst_inter_code = Concatenate()([warped_dst_inter_AB_code,warped_dst_inter_AB_code]) - - pred_src_src = self.decoder(warped_src_inter_code) - pred_dst_dst = self.decoder(warped_dst_inter_code) - pred_src_dst = self.decoder(warped_src_dst_inter_code) - - if self.options['learn_mask']: - pred_src_srcm = self.decoderm(warped_src_inter_code) - pred_dst_dstm = self.decoderm(warped_dst_inter_code) - pred_src_dstm = self.decoderm(warped_src_dst_inter_code) - - elif 'df' in self.options['archi']: - self.encoder = modelify(SAEModel.DFEncFlow(resolution, ae_dims=ae_dims, ch_dims=e_ch_dims, **common_flow_kwargs) ) (Input(bgr_shape)) - - dec_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ] - - self.decoder_src = modelify(SAEModel.DFDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs )) (dec_Inputs) - self.decoder_dst = modelify(SAEModel.DFDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs )) (dec_Inputs) - models_list += [self.encoder, self.decoder_src, self.decoder_dst] - - if self.options['learn_mask']: - self.decoder_srcm = modelify(SAEModel.DFDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs )) (dec_Inputs) - self.decoder_dstm = modelify(SAEModel.DFDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs )) (dec_Inputs) - models_list += [self.decoder_srcm, self.decoder_dstm] - - if not self.is_first_run(): - weights_to_load += [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - if self.options['learn_mask']: - weights_to_load += [ [self.decoder_srcm, 'decoder_srcm.h5'], - [self.decoder_dstm, 'decoder_dstm.h5'], - ] - - warped_src_code = self.encoder (warped_src) - warped_dst_code = self.encoder (warped_dst) - pred_src_src = self.decoder_src(warped_src_code) - pred_dst_dst = self.decoder_dst(warped_dst_code) - pred_src_dst = self.decoder_src(warped_dst_code) - - if self.options['learn_mask']: - pred_src_srcm = self.decoder_srcm(warped_src_code) - pred_dst_dstm = self.decoder_dstm(warped_dst_code) - pred_src_dstm = self.decoder_srcm(warped_dst_code) - - if self.is_first_run(): - if self.options.get('ca_weights',False): - conv_weights_list = [] - for model in models_list: - for layer in model.layers: - if type(layer) == keras.layers.Conv2D: - conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights - CAInitializerMP ( conv_weights_list ) - else: - self.load_weights_safe(weights_to_load) - - pred_src_src, pred_dst_dst, pred_src_dst, = [ [x] if type(x) != list else x for x in [pred_src_src, pred_dst_dst, pred_src_dst, ] ] - - if self.options['learn_mask']: - pred_src_srcm, pred_dst_dstm, pred_src_dstm = [ [x] if type(x) != list else x for x in [pred_src_srcm, pred_dst_dstm, pred_src_dstm] ] - - target_srcm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_srcm_ar] - target_srcm_sigm_ar = target_srcm_blurred_ar #[ x / 2.0 + 0.5 for x in target_srcm_blurred_ar] - target_srcm_anti_sigm_ar = [ 1.0 - x for x in target_srcm_sigm_ar] - - target_dstm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_dstm_ar] - target_dstm_sigm_ar = target_dstm_blurred_ar#[ x / 2.0 + 0.5 for x in target_dstm_blurred_ar] - target_dstm_anti_sigm_ar = [ 1.0 - x for x in target_dstm_sigm_ar] - - target_src_sigm_ar = target_src_ar#[ x + 1 for x in target_src_ar] - target_dst_sigm_ar = target_dst_ar#[ x + 1 for x in target_dst_ar] - - pred_src_src_sigm_ar = pred_src_src#[ x + 1 for x in pred_src_src] - pred_dst_dst_sigm_ar = pred_dst_dst#[ x + 1 for x in pred_dst_dst] - pred_src_dst_sigm_ar = pred_src_dst#[ x + 1 for x in pred_src_dst] - - target_src_masked_ar = [ target_src_sigm_ar[i]*target_srcm_sigm_ar[i] for i in range(len(target_src_sigm_ar))] - target_dst_masked_ar = [ target_dst_sigm_ar[i]*target_dstm_sigm_ar[i] for i in range(len(target_dst_sigm_ar))] - target_dst_anti_masked_ar = [ target_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(target_dst_sigm_ar))] - - pred_src_src_masked_ar = [ pred_src_src_sigm_ar[i] * target_srcm_sigm_ar[i] for i in range(len(pred_src_src_sigm_ar))] - pred_dst_dst_masked_ar = [ pred_dst_dst_sigm_ar[i] * target_dstm_sigm_ar[i] for i in range(len(pred_dst_dst_sigm_ar))] - - target_src_masked_ar_opt = target_src_masked_ar if masked_training else target_src_sigm_ar - target_dst_masked_ar_opt = target_dst_masked_ar if masked_training else target_dst_sigm_ar - - pred_src_src_masked_ar_opt = pred_src_src_masked_ar if masked_training else pred_src_src_sigm_ar - pred_dst_dst_masked_ar_opt = pred_dst_dst_masked_ar if masked_training else pred_dst_dst_sigm_ar - - psd_target_dst_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] - psd_target_dst_anti_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] - - if self.is_training_mode: - self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) - self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) - - if 'liae' in self.options['archi']: - src_dst_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights - if self.options['learn_mask']: - src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoderm.trainable_weights - else: - src_dst_loss_train_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights + self.decoder_dst.trainable_weights - if self.options['learn_mask']: - src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights - - if not self.options['pixel_loss']: - src_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_ar_opt[i], pred_src_src_masked_ar_opt[i]) for i in range(len(target_src_masked_ar_opt)) ]) - else: - src_loss_batch = sum([ K.mean ( 50*K.square( target_src_masked_ar_opt[i] - pred_src_src_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_src_masked_ar_opt)) ]) - - src_loss = K.mean(src_loss_batch) - - face_style_power = self.options['face_style_power'] / 100.0 - - if face_style_power != 0: - src_loss += style_loss(gaussian_blur_radius=resolution//16, loss_weight=face_style_power, wnd_size=0)( psd_target_dst_masked_ar[-1], target_dst_masked_ar[-1] ) - - bg_style_power = self.options['bg_style_power'] / 100.0 - if bg_style_power != 0: - if not self.options['pixel_loss']: - bg_loss = K.mean( (10*bg_style_power)*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked_ar[-1], target_dst_anti_masked_ar[-1] )) - else: - bg_loss = K.mean( (50*bg_style_power)*K.square( psd_target_dst_anti_masked_ar[-1] - target_dst_anti_masked_ar[-1] )) - src_loss += bg_loss - - if not self.options['pixel_loss']: - dst_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)(target_dst_masked_ar_opt[i], pred_dst_dst_masked_ar_opt[i]) for i in range(len(target_dst_masked_ar_opt)) ]) - else: - dst_loss_batch = sum([ K.mean ( 50*K.square( target_dst_masked_ar_opt[i] - pred_dst_dst_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_dst_masked_ar_opt)) ]) - - dst_loss = K.mean(dst_loss_batch) - - feed = [warped_src, warped_dst] - feed += target_src_ar[::-1] - feed += target_srcm_ar[::-1] - feed += target_dst_ar[::-1] - feed += target_dstm_ar[::-1] - - self.src_dst_train = K.function (feed,[src_loss,dst_loss], self.src_dst_opt.get_updates(src_loss+dst_loss, src_dst_loss_train_weights) ) - - if self.options['learn_mask']: - src_mask_loss = sum([ K.mean(K.square(target_srcm_ar[-1]-pred_src_srcm[-1])) for i in range(len(target_srcm_ar)) ]) - dst_mask_loss = sum([ K.mean(K.square(target_dstm_ar[-1]-pred_dst_dstm[-1])) for i in range(len(target_dstm_ar)) ]) - - feed = [ warped_src, warped_dst] - feed += target_srcm_ar[::-1] - feed += target_dstm_ar[::-1] - - self.src_dst_mask_train = K.function (feed,[src_mask_loss, dst_mask_loss], self.src_dst_mask_opt.get_updates(src_mask_loss+dst_mask_loss, src_dst_mask_loss_train_weights) ) - - if self.options['learn_mask']: - self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src[-1], pred_dst_dst[-1], pred_dst_dstm[-1], pred_src_dst[-1], pred_src_dstm[-1]]) - else: - self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src[-1], pred_dst_dst[-1], pred_src_dst[-1] ] ) - - - else: - if self.options['learn_mask']: - self.AE_convert = K.function ([warped_dst],[ pred_src_dst[-1], pred_dst_dstm[-1], pred_src_dstm[-1] ]) - else: - self.AE_convert = K.function ([warped_dst],[ pred_src_dst[-1] ]) - - - if self.is_training_mode: - self.src_sample_losses = [] - self.dst_sample_losses = [] - - t = SampleProcessor.Types - face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF - - t_mode_bgr = t.MODE_BGR if not self.pretrain else t.MODE_BGR_SHUFFLE - - training_data_src_path = self.training_data_src_path - training_data_dst_path = self.training_data_dst_path - sort_by_yaw = self.sort_by_yaw - - if self.pretrain and self.pretraining_data_path is not None: - training_data_src_path = self.pretraining_data_path - training_data_dst_path = self.pretraining_data_path - sort_by_yaw = False - - self.set_training_data_generators ([ - SampleGeneratorFace(training_data_src_path, sort_by_yaw_target_samples_path=training_data_dst_path if sort_by_yaw else None, - random_ct_samples_path=training_data_dst_path if apply_random_ct else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), - output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution, 'apply_ct': apply_random_ct} ] + \ - [ {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution // (2**i), 'apply_ct': apply_random_ct } for i in range(ms_count)] + \ - [ {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution // (2**i) } for i in range(ms_count)] - ), - - SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, ), - output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution} ] + \ - [ {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution // (2**i)} for i in range(ms_count)] + \ - [ {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution // (2**i) } for i in range(ms_count)]) - ]) - - #override - def get_model_filename_list(self): - ar = [] - if 'liae' in self.options['archi']: - ar += [[self.encoder, 'encoder.h5'], - [self.inter_B, 'inter_B.h5'], - [self.decoder, 'decoder.h5'] - ] - - if not self.pretrain or self.iter == 0: - ar += [ [self.inter_AB, 'inter_AB.h5'], - ] - - if self.options['learn_mask']: - ar += [ [self.decoderm, 'decoderm.h5'] ] - - elif 'df' in self.options['archi']: - if not self.pretrain or self.iter == 0: - ar += [ [self.encoder, 'encoder.h5'], - ] - - ar += [ [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - - if self.options['learn_mask']: - ar += [ [self.decoder_srcm, 'decoder_srcm.h5'], - [self.decoder_dstm, 'decoder_dstm.h5'] ] - return ar - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, generators_samples, generators_list): - src_samples = generators_samples[0] - dst_samples = generators_samples[1] - - feed = [src_samples[0], dst_samples[0] ] + \ - src_samples[1:1+self.ms_count*2] + \ - dst_samples[1:1+self.ms_count*2] - - src_loss, dst_loss, = self.src_dst_train (feed) - - if self.options['learn_mask']: - feed = [ src_samples[0], dst_samples[0] ] + \ - src_samples[1+self.ms_count:1+self.ms_count*2] + \ - dst_samples[1+self.ms_count:1+self.ms_count*2] - src_mask_loss, dst_mask_loss, = self.src_dst_mask_train (feed) - - return ( ('src_loss', src_loss), ('dst_loss', dst_loss) ) - - - #override - def onGetPreview(self, sample): - test_S = sample[0][1][0:4] #first 4 samples - test_S_m = sample[0][1+self.ms_count][0:4] #first 4 samples - test_D = sample[1][1][0:4] - test_D_m = sample[1][1+self.ms_count][0:4] - - if self.options['learn_mask']: - S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] - DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ] - else: - S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] - - result = [] - st = [] - for i in range(0, len(test_S)): - ar = S[i], SS[i], D[i], DD[i], SD[i] - st.append ( np.concatenate ( ar, axis=1) ) - - result += [ ('SAE', np.concatenate (st, axis=0 )), ] - - if self.options['learn_mask']: - st_m = [] - for i in range(0, len(test_S)): - ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) - st_m.append ( np.concatenate ( ar, axis=1) ) - - result += [ ('SAE masked', np.concatenate (st_m, axis=0 )), ] - - return result - - def predictor_func (self, face): - if self.options['learn_mask']: - bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]]) - mask = mask_dst_dstm[0] * mask_src_dstm[0] - return bgr[0], mask[...,0] - else: - bgr, = self.AE_convert ([face[np.newaxis,...]]) - return bgr[0] - - #override - def get_converter(self): - base_erode_mask_modifier = 30 if self.options['face_type'] == 'f' else 100 - base_blur_mask_modifier = 0 if self.options['face_type'] == 'f' else 100 - - default_erode_mask_modifier = 0 - default_blur_mask_modifier = 100 if (self.options['face_style_power'] or self.options['bg_style_power']) and \ - self.options['face_type'] == 'f' else 0 - - face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - - from converters import ConverterMasked - return ConverterMasked(self.predictor_func, - predictor_input_size=self.options['resolution'], - predictor_masked=self.options['learn_mask'], - face_type=face_type, - default_mode = 1 if self.options['apply_random_ct'] or self.options['face_style_power'] or self.options['bg_style_power'] else 4, - base_erode_mask_modifier=base_erode_mask_modifier, - base_blur_mask_modifier=base_blur_mask_modifier, - default_erode_mask_modifier=default_erode_mask_modifier, - default_blur_mask_modifier=default_blur_mask_modifier, - clip_hborder_mask_per=0.0625 if (self.options['face_type'] == 'f') else 0) - - @staticmethod - def initialize_nn_functions(): - exec (nnlib.import_all(), locals(), globals()) - - def NormPass(x): - return x - - def Norm(norm=''): - if norm == 'bn': - return BatchNormalization(axis=-1) - else: - return NormPass - - def Act(act='', lrelu_alpha=0.1): - if act == 'prelu': - return PReLU() - else: - return LeakyReLU(alpha=lrelu_alpha) - - class ResidualBlock(object): - def __init__(self, filters, kernel_size=3, padding='zero', norm='', act='', **kwargs): - self.filters = filters - self.kernel_size = kernel_size - self.padding = padding - self.norm = norm - self.act = act - - def __call__(self, inp): - x = inp - x = Conv2D(self.filters, kernel_size=self.kernel_size, padding=self.padding)(x) - x = Act(self.act, lrelu_alpha=0.2)(x) - x = Norm(self.norm)(x) - x = Conv2D(self.filters, kernel_size=self.kernel_size, padding=self.padding)(x) - x = Add()([x, inp]) - x = Act(self.act, lrelu_alpha=0.2)(x) - x = Norm(self.norm)(x) - return x - SAEModel.ResidualBlock = ResidualBlock - - def downscale (dim, padding='zero', norm='', act='', **kwargs): - def func(x): - return Norm(norm)( Act(act) (Conv2D(dim, kernel_size=5, strides=2, padding=padding)(x)) ) - return func - SAEModel.downscale = downscale - - def upscale (dim, padding='zero', norm='', act='', **kwargs): - def func(x): - return SubpixelUpscaler()(Norm(norm)(Act(act)(Conv2D(dim * 4, kernel_size=3, strides=1, padding=padding)(x)))) - return func - SAEModel.upscale = upscale - - def to_bgr (output_nc, padding='zero', **kwargs): - def func(x): - return Conv2D(output_nc, kernel_size=5, padding=padding, activation='sigmoid')(x) - return func - SAEModel.to_bgr = to_bgr - - @staticmethod - def LIAEEncFlow(resolution, ch_dims, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - downscale = partial(SAEModel.downscale, **kwargs) - - def func(input): - dims = K.int_shape(input)[-1]*ch_dims - - x = input - x = downscale(dims)(x) - x = downscale(dims*2)(x) - x = downscale(dims*4)(x) - x = downscale(dims*8)(x) - - x = Flatten()(x) - return x - return func - - @staticmethod - def LIAEInterFlow(resolution, ae_dims=256, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - lowest_dense_res=resolution // 16 - - def func(input): - x = input[0] - x = Dense(ae_dims)(x) - x = Dense(lowest_dense_res * lowest_dense_res * ae_dims*2)(x) - x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims*2))(x) - x = upscale(ae_dims*2)(x) - return x - return func - - @staticmethod - def LIAEDecFlow(output_nc,ch_dims, multiscale_count=1, add_residual_blocks=False, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - to_bgr = partial(SAEModel.to_bgr, **kwargs) - dims = output_nc * ch_dims - ResidualBlock = partial(SAEModel.ResidualBlock, **kwargs) - - def func(input): - x = input[0] - - outputs = [] - x1 = upscale(dims*8)( x ) - - if add_residual_blocks: - x1 = ResidualBlock(dims*8)(x1) - x1 = ResidualBlock(dims*8)(x1) - - if multiscale_count >= 3: - outputs += [ to_bgr(output_nc) ( x1 ) ] - - x2 = upscale(dims*4)( x1 ) - - if add_residual_blocks: - x2 = ResidualBlock(dims*4)(x2) - x2 = ResidualBlock(dims*4)(x2) - - if multiscale_count >= 2: - outputs += [ to_bgr(output_nc) ( x2 ) ] - - x3 = upscale(dims*2)( x2 ) - - if add_residual_blocks: - x3 = ResidualBlock( dims*2)(x3) - x3 = ResidualBlock( dims*2)(x3) - - outputs += [ to_bgr(output_nc) ( x3 ) ] - - return outputs - return func - - @staticmethod - def DFEncFlow(resolution, ae_dims, ch_dims, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - downscale = partial(SAEModel.downscale, **kwargs)#, kernel_regularizer=keras.regularizers.l2(0.0), - lowest_dense_res = resolution // 16 - - def func(input): - x = input - - dims = K.int_shape(input)[-1]*ch_dims - x = downscale(dims)(x) - x = downscale(dims*2)(x) - x = downscale(dims*4)(x) - x = downscale(dims*8)(x) - - x = Dense(ae_dims)(Flatten()(x)) - x = Dense(lowest_dense_res * lowest_dense_res * ae_dims)(x) - x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x) - x = upscale(ae_dims)(x) - return x - return func - - @staticmethod - def DFDecFlow(output_nc, ch_dims, multiscale_count=1, add_residual_blocks=False, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - upscale = partial(SAEModel.upscale, **kwargs) - to_bgr = partial(SAEModel.to_bgr, **kwargs) - dims = output_nc * ch_dims - ResidualBlock = partial(SAEModel.ResidualBlock, **kwargs) - - def func(input): - x = input[0] - - outputs = [] - x1 = upscale(dims*8)( x ) - - if add_residual_blocks: - x1 = ResidualBlock( dims*8 )(x1) - x1 = ResidualBlock( dims*8 )(x1) - - if multiscale_count >= 3: - outputs += [ to_bgr(output_nc) ( x1 ) ] - - x2 = upscale(dims*4)( x1 ) - - if add_residual_blocks: - x2 = ResidualBlock( dims*4)(x2) - x2 = ResidualBlock( dims*4)(x2) - - if multiscale_count >= 2: - outputs += [ to_bgr(output_nc) ( x2 ) ] - - x3 = upscale(dims*2)( x2 ) - - if add_residual_blocks: - x3 = ResidualBlock( dims*2)(x3) - x3 = ResidualBlock( dims*2)(x3) - - outputs += [ to_bgr(output_nc) ( x3 ) ] - - return outputs - return func - - +from functools import partial +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samplelib import * +from interact import interact as io + +#SAE - Styled AutoEncoder +class SAEModel(ModelBase): + + encoderH5 = 'encoder.h5' + inter_BH5 = 'inter_B.h5' + inter_ABH5 = 'inter_AB.h5' + decoderH5 = 'decoder.h5' + decodermH5 = 'decoderm.h5' + + decoder_srcH5 = 'decoder_src.h5' + decoder_srcmH5 = 'decoder_srcm.h5' + decoder_dstH5 = 'decoder_dst.h5' + decoder_dstmH5 = 'decoder_dstm.h5' + + #override + def onInitializeOptions(self, is_first_run, ask_override): + yn_str = {True:'y',False:'n'} + + default_resolution = 128 + default_archi = 'df' + default_face_type = 'f' + + if is_first_run: + resolution = io.input_int("Resolution ( 64-256 ?:help skip:128) : ", default_resolution, help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") + resolution = np.clip (resolution, 64, 256) + while np.modf(resolution / 16)[0] != 0.0: + resolution -= 1 + self.options['resolution'] = resolution + + self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() + self.options['learn_mask'] = io.input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.") + else: + self.options['resolution'] = self.options.get('resolution', default_resolution) + self.options['face_type'] = self.options.get('face_type', default_face_type) + self.options['learn_mask'] = self.options.get('learn_mask', True) + + + if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend: + def_optimizer_mode = self.options.get('optimizer_mode', 1) + self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.") + else: + self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1) + + if is_first_run: + self.options['archi'] = io.input_str ("AE architecture (df, liae ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes.").lower() #-s version is slower, but has decreased change to collapse. + else: + self.options['archi'] = self.options.get('archi', default_archi) + + default_ae_dims = 256 if 'liae' in self.options['archi'] else 512 + default_e_ch_dims = 42 + default_d_ch_dims = default_e_ch_dims // 2 + def_ca_weights = False + + if is_first_run: + self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dims (32-1024 ?:help skip:%d) : " % (default_ae_dims) , default_ae_dims, help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 ) + self.options['e_ch_dims'] = np.clip ( io.input_int("Encoder dims per channel (21-85 ?:help skip:%d) : " % (default_e_ch_dims) , default_e_ch_dims, help_message="More encoder dims help to recognize more facial features, but require more VRAM. You can fine-tune model size to fit your GPU." ), 21, 85 ) + default_d_ch_dims = self.options['e_ch_dims'] // 2 + self.options['d_ch_dims'] = np.clip ( io.input_int("Decoder dims per channel (10-85 ?:help skip:%d) : " % (default_d_ch_dims) , default_d_ch_dims, help_message="More decoder dims help to get better details, but require more VRAM. You can fine-tune model size to fit your GPU." ), 10, 85 ) + self.options['multiscale_decoder'] = io.input_bool ("Use multiscale decoder? (y/n, ?:help skip:n) : ", False, help_message="Multiscale decoder helps to get better details.") + self.options['ca_weights'] = io.input_bool ("Use CA weights? (y/n, ?:help skip: %s ) : " % (yn_str[def_ca_weights]), def_ca_weights, help_message="Initialize network with 'Convolution Aware' weights. This may help to achieve a higher accuracy model, but consumes a time at first run.") + else: + self.options['ae_dims'] = self.options.get('ae_dims', default_ae_dims) + self.options['e_ch_dims'] = self.options.get('e_ch_dims', default_e_ch_dims) + self.options['d_ch_dims'] = self.options.get('d_ch_dims', default_d_ch_dims) + self.options['multiscale_decoder'] = self.options.get('multiscale_decoder', False) + self.options['ca_weights'] = self.options.get('ca_weights', def_ca_weights) + + default_face_style_power = 0.0 + default_bg_style_power = 0.0 + if is_first_run or ask_override: + def_pixel_loss = self.options.get('pixel_loss', False) + self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: %s ) : " % (yn_str[def_pixel_loss]), def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time. Enabling this option too early increases the chance of model collapse.") + + default_face_style_power = default_face_style_power if is_first_run else self.options.get('face_style_power', default_face_style_power) + self.options['face_style_power'] = np.clip ( io.input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power, + help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) + + default_bg_style_power = default_bg_style_power if is_first_run else self.options.get('bg_style_power', default_bg_style_power) + self.options['bg_style_power'] = np.clip ( io.input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power, + help_message="Learn to transfer image around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) + + default_apply_random_ct = False if is_first_run else self.options.get('apply_random_ct', False) + self.options['apply_random_ct'] = io.input_bool ("Apply random color transfer to src faceset? (y/n, ?:help skip:%s) : " % (yn_str[default_apply_random_ct]), default_apply_random_ct, help_message="Increase variativity of src samples by apply LCT color transfer from random dst samples. It is like 'face_style' learning, but more precise color transfer and without risk of model collapse, also it does not require additional GPU resources, but the training time may be longer, due to the src faceset is becoming more diverse.") + + if nnlib.device.backend != 'plaidML': # todo https://github.com/plaidml/plaidml/issues/301 + default_clipgrad = False if is_first_run else self.options.get('clipgrad', False) + self.options['clipgrad'] = io.input_bool ("Enable gradient clipping? (y/n, ?:help skip:%s) : " % (yn_str[default_clipgrad]), default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.") + else: + self.options['clipgrad'] = False + + else: + self.options['pixel_loss'] = self.options.get('pixel_loss', False) + self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power) + self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power) + self.options['apply_random_ct'] = self.options.get('apply_random_ct', False) + self.options['clipgrad'] = self.options.get('clipgrad', False) + + if is_first_run: + self.options['pretrain'] = io.input_bool ("Pretrain the model? (y/n, ?:help skip:n) : ", False, help_message="Pretrain the model with large amount of various faces. This technique may help to train the fake with overly different face shapes and light conditions of src/dst data. Face will be look more like a morphed. To reduce the morph effect, some model files will be initialized but not be updated after pretrain: LIAE: inter_AB.h5 DF: encoder.h5. The longer you pretrain the model the more morphed face will look. After that, save and run the training again.") + else: + self.options['pretrain'] = False + + #override + def onInitialize(self): + exec(nnlib.import_all(), locals(), globals()) + SAEModel.initialize_nn_functions() + self.set_vram_batch_requirements({1.5:4}) + + resolution = self.options['resolution'] + ae_dims = self.options['ae_dims'] + e_ch_dims = self.options['e_ch_dims'] + d_ch_dims = self.options['d_ch_dims'] + self.pretrain = self.options['pretrain'] = self.options.get('pretrain', False) + if not self.pretrain: + self.options.pop('pretrain') + + d_residual_blocks = True + bgr_shape = (resolution, resolution, 3) + mask_shape = (resolution, resolution, 1) + + self.ms_count = ms_count = 3 if (self.options['multiscale_decoder']) else 1 + + apply_random_ct = self.options.get('apply_random_ct', False) + masked_training = True + + warped_src = Input(bgr_shape) + target_src = Input(bgr_shape) + target_srcm = Input(mask_shape) + + warped_dst = Input(bgr_shape) + target_dst = Input(bgr_shape) + target_dstm = Input(mask_shape) + + target_src_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)] + target_srcm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)] + target_dst_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)] + target_dstm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)] + + common_flow_kwargs = { 'padding': 'zero', + 'norm': '', + 'act':'' } + models_list = [] + weights_to_load = [] + if 'liae' in self.options['archi']: + self.encoder = modelify(SAEModel.LIAEEncFlow(resolution, ch_dims=e_ch_dims, **common_flow_kwargs) ) (Input(bgr_shape)) + + enc_output_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ] + + self.inter_B = modelify(SAEModel.LIAEInterFlow(resolution, ae_dims=ae_dims, **common_flow_kwargs)) (enc_output_Inputs) + self.inter_AB = modelify(SAEModel.LIAEInterFlow(resolution, ae_dims=ae_dims, **common_flow_kwargs)) (enc_output_Inputs) + + inter_output_Inputs = [ Input( np.array(K.int_shape(x)[1:])*(1,1,2) ) for x in self.inter_B.outputs ] + + self.decoder = modelify(SAEModel.LIAEDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs)) (inter_output_Inputs) + models_list += [self.encoder, self.inter_B, self.inter_AB, self.decoder] + + if self.options['learn_mask']: + self.decoderm = modelify(SAEModel.LIAEDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs)) (inter_output_Inputs) + models_list += [self.decoderm] + + if not self.is_first_run(): + weights_to_load += [ [self.encoder , 'encoder.h5'], + [self.inter_B , 'inter_B.h5'], + [self.inter_AB, 'inter_AB.h5'], + [self.decoder , 'decoder.h5'], + ] + if self.options['learn_mask']: + weights_to_load += [ [self.decoderm, 'decoderm.h5'] ] + + warped_src_code = self.encoder (warped_src) + warped_src_inter_AB_code = self.inter_AB (warped_src_code) + warped_src_inter_code = Concatenate()([warped_src_inter_AB_code,warped_src_inter_AB_code]) + + warped_dst_code = self.encoder (warped_dst) + warped_dst_inter_B_code = self.inter_B (warped_dst_code) + warped_dst_inter_AB_code = self.inter_AB (warped_dst_code) + warped_dst_inter_code = Concatenate()([warped_dst_inter_B_code,warped_dst_inter_AB_code]) + + warped_src_dst_inter_code = Concatenate()([warped_dst_inter_AB_code,warped_dst_inter_AB_code]) + + pred_src_src = self.decoder(warped_src_inter_code) + pred_dst_dst = self.decoder(warped_dst_inter_code) + pred_src_dst = self.decoder(warped_src_dst_inter_code) + + if self.options['learn_mask']: + pred_src_srcm = self.decoderm(warped_src_inter_code) + pred_dst_dstm = self.decoderm(warped_dst_inter_code) + pred_src_dstm = self.decoderm(warped_src_dst_inter_code) + + elif 'df' in self.options['archi']: + self.encoder = modelify(SAEModel.DFEncFlow(resolution, ae_dims=ae_dims, ch_dims=e_ch_dims, **common_flow_kwargs) ) (Input(bgr_shape)) + + dec_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ] + + self.decoder_src = modelify(SAEModel.DFDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs )) (dec_Inputs) + self.decoder_dst = modelify(SAEModel.DFDecFlow (bgr_shape[2],ch_dims=d_ch_dims, multiscale_count=self.ms_count, add_residual_blocks=d_residual_blocks, **common_flow_kwargs )) (dec_Inputs) + models_list += [self.encoder, self.decoder_src, self.decoder_dst] + + if self.options['learn_mask']: + self.decoder_srcm = modelify(SAEModel.DFDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs )) (dec_Inputs) + self.decoder_dstm = modelify(SAEModel.DFDecFlow (mask_shape[2],ch_dims=d_ch_dims, **common_flow_kwargs )) (dec_Inputs) + models_list += [self.decoder_srcm, self.decoder_dstm] + + if not self.is_first_run(): + weights_to_load += [ [self.encoder , 'encoder.h5'], + [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + if self.options['learn_mask']: + weights_to_load += [ [self.decoder_srcm, 'decoder_srcm.h5'], + [self.decoder_dstm, 'decoder_dstm.h5'], + ] + + warped_src_code = self.encoder (warped_src) + warped_dst_code = self.encoder (warped_dst) + pred_src_src = self.decoder_src(warped_src_code) + pred_dst_dst = self.decoder_dst(warped_dst_code) + pred_src_dst = self.decoder_src(warped_dst_code) + + if self.options['learn_mask']: + pred_src_srcm = self.decoder_srcm(warped_src_code) + pred_dst_dstm = self.decoder_dstm(warped_dst_code) + pred_src_dstm = self.decoder_srcm(warped_dst_code) + + if self.is_first_run(): + if self.options.get('ca_weights',False): + conv_weights_list = [] + for model in models_list: + for layer in model.layers: + if type(layer) == keras.layers.Conv2D: + conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights + CAInitializerMP ( conv_weights_list ) + else: + self.load_weights_safe(weights_to_load) + + pred_src_src, pred_dst_dst, pred_src_dst, = [ [x] if type(x) != list else x for x in [pred_src_src, pred_dst_dst, pred_src_dst, ] ] + + if self.options['learn_mask']: + pred_src_srcm, pred_dst_dstm, pred_src_dstm = [ [x] if type(x) != list else x for x in [pred_src_srcm, pred_dst_dstm, pred_src_dstm] ] + + target_srcm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_srcm_ar] + target_srcm_sigm_ar = target_srcm_blurred_ar #[ x / 2.0 + 0.5 for x in target_srcm_blurred_ar] + target_srcm_anti_sigm_ar = [ 1.0 - x for x in target_srcm_sigm_ar] + + target_dstm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_dstm_ar] + target_dstm_sigm_ar = target_dstm_blurred_ar#[ x / 2.0 + 0.5 for x in target_dstm_blurred_ar] + target_dstm_anti_sigm_ar = [ 1.0 - x for x in target_dstm_sigm_ar] + + target_src_sigm_ar = target_src_ar#[ x + 1 for x in target_src_ar] + target_dst_sigm_ar = target_dst_ar#[ x + 1 for x in target_dst_ar] + + pred_src_src_sigm_ar = pred_src_src#[ x + 1 for x in pred_src_src] + pred_dst_dst_sigm_ar = pred_dst_dst#[ x + 1 for x in pred_dst_dst] + pred_src_dst_sigm_ar = pred_src_dst#[ x + 1 for x in pred_src_dst] + + target_src_masked_ar = [ target_src_sigm_ar[i]*target_srcm_sigm_ar[i] for i in range(len(target_src_sigm_ar))] + target_dst_masked_ar = [ target_dst_sigm_ar[i]*target_dstm_sigm_ar[i] for i in range(len(target_dst_sigm_ar))] + target_dst_anti_masked_ar = [ target_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(target_dst_sigm_ar))] + + pred_src_src_masked_ar = [ pred_src_src_sigm_ar[i] * target_srcm_sigm_ar[i] for i in range(len(pred_src_src_sigm_ar))] + pred_dst_dst_masked_ar = [ pred_dst_dst_sigm_ar[i] * target_dstm_sigm_ar[i] for i in range(len(pred_dst_dst_sigm_ar))] + + target_src_masked_ar_opt = target_src_masked_ar if masked_training else target_src_sigm_ar + target_dst_masked_ar_opt = target_dst_masked_ar if masked_training else target_dst_sigm_ar + + pred_src_src_masked_ar_opt = pred_src_src_masked_ar if masked_training else pred_src_src_sigm_ar + pred_dst_dst_masked_ar_opt = pred_dst_dst_masked_ar if masked_training else pred_dst_dst_sigm_ar + + psd_target_dst_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] + psd_target_dst_anti_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] + + if self.is_training_mode: + self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) + self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) + + if 'liae' in self.options['archi']: + src_dst_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights + if self.options['learn_mask']: + src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoderm.trainable_weights + else: + src_dst_loss_train_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights + self.decoder_dst.trainable_weights + if self.options['learn_mask']: + src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights + + if not self.options['pixel_loss']: + src_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_ar_opt[i], pred_src_src_masked_ar_opt[i]) for i in range(len(target_src_masked_ar_opt)) ]) + else: + src_loss_batch = sum([ K.mean ( 50*K.square( target_src_masked_ar_opt[i] - pred_src_src_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_src_masked_ar_opt)) ]) + + src_loss = K.mean(src_loss_batch) + + face_style_power = self.options['face_style_power'] / 100.0 + + if face_style_power != 0: + src_loss += style_loss(gaussian_blur_radius=resolution//16, loss_weight=face_style_power, wnd_size=0)( psd_target_dst_masked_ar[-1], target_dst_masked_ar[-1] ) + + bg_style_power = self.options['bg_style_power'] / 100.0 + if bg_style_power != 0: + if not self.options['pixel_loss']: + bg_loss = K.mean( (10*bg_style_power)*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked_ar[-1], target_dst_anti_masked_ar[-1] )) + else: + bg_loss = K.mean( (50*bg_style_power)*K.square( psd_target_dst_anti_masked_ar[-1] - target_dst_anti_masked_ar[-1] )) + src_loss += bg_loss + + if not self.options['pixel_loss']: + dst_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)(target_dst_masked_ar_opt[i], pred_dst_dst_masked_ar_opt[i]) for i in range(len(target_dst_masked_ar_opt)) ]) + else: + dst_loss_batch = sum([ K.mean ( 50*K.square( target_dst_masked_ar_opt[i] - pred_dst_dst_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_dst_masked_ar_opt)) ]) + + dst_loss = K.mean(dst_loss_batch) + + feed = [warped_src, warped_dst] + feed += target_src_ar[::-1] + feed += target_srcm_ar[::-1] + feed += target_dst_ar[::-1] + feed += target_dstm_ar[::-1] + + self.src_dst_train = K.function (feed,[src_loss,dst_loss], self.src_dst_opt.get_updates(src_loss+dst_loss, src_dst_loss_train_weights) ) + + if self.options['learn_mask']: + src_mask_loss = sum([ K.mean(K.square(target_srcm_ar[-1]-pred_src_srcm[-1])) for i in range(len(target_srcm_ar)) ]) + dst_mask_loss = sum([ K.mean(K.square(target_dstm_ar[-1]-pred_dst_dstm[-1])) for i in range(len(target_dstm_ar)) ]) + + feed = [ warped_src, warped_dst] + feed += target_srcm_ar[::-1] + feed += target_dstm_ar[::-1] + + self.src_dst_mask_train = K.function (feed,[src_mask_loss, dst_mask_loss], self.src_dst_mask_opt.get_updates(src_mask_loss+dst_mask_loss, src_dst_mask_loss_train_weights) ) + + if self.options['learn_mask']: + self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src[-1], pred_dst_dst[-1], pred_dst_dstm[-1], pred_src_dst[-1], pred_src_dstm[-1]]) + else: + self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src[-1], pred_dst_dst[-1], pred_src_dst[-1] ] ) + + + else: + if self.options['learn_mask']: + self.AE_convert = K.function ([warped_dst],[ pred_src_dst[-1], pred_dst_dstm[-1], pred_src_dstm[-1] ]) + else: + self.AE_convert = K.function ([warped_dst],[ pred_src_dst[-1] ]) + + + if self.is_training_mode: + self.src_sample_losses = [] + self.dst_sample_losses = [] + + t = SampleProcessor.Types + face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF + + t_mode_bgr = t.MODE_BGR if not self.pretrain else t.MODE_BGR_SHUFFLE + + training_data_src_path = self.training_data_src_path + training_data_dst_path = self.training_data_dst_path + sort_by_yaw = self.sort_by_yaw + + if self.pretrain and self.pretraining_data_path is not None: + training_data_src_path = self.pretraining_data_path + training_data_dst_path = self.pretraining_data_path + sort_by_yaw = False + + self.set_training_data_generators ([ + SampleGeneratorFace(training_data_src_path, sort_by_yaw_target_samples_path=training_data_dst_path if sort_by_yaw else None, + random_ct_samples_path=training_data_dst_path if apply_random_ct else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ), + output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution, 'apply_ct': apply_random_ct} ] + \ + [ {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution // (2**i), 'apply_ct': apply_random_ct } for i in range(ms_count)] + \ + [ {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution // (2**i) } for i in range(ms_count)] + ), + + SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, ), + output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution} ] + \ + [ {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution // (2**i)} for i in range(ms_count)] + \ + [ {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution // (2**i) } for i in range(ms_count)]) + ]) + + #override + def get_model_filename_list(self): + ar = [] + if 'liae' in self.options['archi']: + ar += [[self.encoder, 'encoder.h5'], + [self.inter_B, 'inter_B.h5'], + [self.decoder, 'decoder.h5'] + ] + + if not self.pretrain or self.iter == 0: + ar += [ [self.inter_AB, 'inter_AB.h5'], + ] + + if self.options['learn_mask']: + ar += [ [self.decoderm, 'decoderm.h5'] ] + + elif 'df' in self.options['archi']: + if not self.pretrain or self.iter == 0: + ar += [ [self.encoder, 'encoder.h5'], + ] + + ar += [ [self.decoder_src, 'decoder_src.h5'], + [self.decoder_dst, 'decoder_dst.h5'] + ] + + if self.options['learn_mask']: + ar += [ [self.decoder_srcm, 'decoder_srcm.h5'], + [self.decoder_dstm, 'decoder_dstm.h5'] ] + return ar + + #override + def onSave(self): + self.save_weights_safe( self.get_model_filename_list() ) + + #override + def onTrainOneIter(self, generators_samples, generators_list): + src_samples = generators_samples[0] + dst_samples = generators_samples[1] + + feed = [src_samples[0], dst_samples[0] ] + \ + src_samples[1:1+self.ms_count*2] + \ + dst_samples[1:1+self.ms_count*2] + + src_loss, dst_loss, = self.src_dst_train (feed) + + if self.options['learn_mask']: + feed = [ src_samples[0], dst_samples[0] ] + \ + src_samples[1+self.ms_count:1+self.ms_count*2] + \ + dst_samples[1+self.ms_count:1+self.ms_count*2] + src_mask_loss, dst_mask_loss, = self.src_dst_mask_train (feed) + + return ( ('src_loss', src_loss), ('dst_loss', dst_loss) ) + + + #override + def onGetPreview(self, sample): + test_S = sample[0][1][0:4] #first 4 samples + test_S_m = sample[0][1+self.ms_count][0:4] #first 4 samples + test_D = sample[1][1][0:4] + test_D_m = sample[1][1+self.ms_count][0:4] + + if self.options['learn_mask']: + S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] + DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ] + else: + S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] + + result = [] + st = [] + for i in range(0, len(test_S)): + ar = S[i], SS[i], D[i], DD[i], SD[i] + st.append ( np.concatenate ( ar, axis=1) ) + + result += [ ('SAE', np.concatenate (st, axis=0 )), ] + + if self.options['learn_mask']: + st_m = [] + for i in range(0, len(test_S)): + ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) + st_m.append ( np.concatenate ( ar, axis=1) ) + + result += [ ('SAE masked', np.concatenate (st_m, axis=0 )), ] + + return result + + def predictor_func (self, face): + if self.options['learn_mask']: + bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]]) + mask = mask_dst_dstm[0] * mask_src_dstm[0] + return bgr[0], mask[...,0] + else: + bgr, = self.AE_convert ([face[np.newaxis,...]]) + return bgr[0] + + #override + def get_converter(self): + base_erode_mask_modifier = 30 if self.options['face_type'] == 'f' else 100 + base_blur_mask_modifier = 0 if self.options['face_type'] == 'f' else 100 + + default_erode_mask_modifier = 0 + default_blur_mask_modifier = 100 if (self.options['face_style_power'] or self.options['bg_style_power']) and \ + self.options['face_type'] == 'f' else 0 + + face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF + + from converters import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=self.options['resolution'], + predictor_masked=self.options['learn_mask'], + face_type=face_type, + default_mode = 1 if self.options['apply_random_ct'] or self.options['face_style_power'] or self.options['bg_style_power'] else 4, + base_erode_mask_modifier=base_erode_mask_modifier, + base_blur_mask_modifier=base_blur_mask_modifier, + default_erode_mask_modifier=default_erode_mask_modifier, + default_blur_mask_modifier=default_blur_mask_modifier, + clip_hborder_mask_per=0.0625 if (self.options['face_type'] == 'f') else 0) + + @staticmethod + def initialize_nn_functions(): + exec (nnlib.import_all(), locals(), globals()) + + def NormPass(x): + return x + + def Norm(norm=''): + if norm == 'bn': + return BatchNormalization(axis=-1) + else: + return NormPass + + def Act(act='', lrelu_alpha=0.1): + if act == 'prelu': + return PReLU() + else: + return LeakyReLU(alpha=lrelu_alpha) + + class ResidualBlock(object): + def __init__(self, filters, kernel_size=3, padding='zero', norm='', act='', **kwargs): + self.filters = filters + self.kernel_size = kernel_size + self.padding = padding + self.norm = norm + self.act = act + + def __call__(self, inp): + x = inp + x = Conv2D(self.filters, kernel_size=self.kernel_size, padding=self.padding)(x) + x = Act(self.act, lrelu_alpha=0.2)(x) + x = Norm(self.norm)(x) + x = Conv2D(self.filters, kernel_size=self.kernel_size, padding=self.padding)(x) + x = Add()([x, inp]) + x = Act(self.act, lrelu_alpha=0.2)(x) + x = Norm(self.norm)(x) + return x + SAEModel.ResidualBlock = ResidualBlock + + def downscale (dim, padding='zero', norm='', act='', **kwargs): + def func(x): + return Norm(norm)( Act(act) (Conv2D(dim, kernel_size=5, strides=2, padding=padding)(x)) ) + return func + SAEModel.downscale = downscale + + def upscale (dim, padding='zero', norm='', act='', **kwargs): + def func(x): + return SubpixelUpscaler()(Norm(norm)(Act(act)(Conv2D(dim * 4, kernel_size=3, strides=1, padding=padding)(x)))) + return func + SAEModel.upscale = upscale + + def to_bgr (output_nc, padding='zero', **kwargs): + def func(x): + return Conv2D(output_nc, kernel_size=5, padding=padding, activation='sigmoid')(x) + return func + SAEModel.to_bgr = to_bgr + + @staticmethod + def LIAEEncFlow(resolution, ch_dims, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + downscale = partial(SAEModel.downscale, **kwargs) + + def func(input): + dims = K.int_shape(input)[-1]*ch_dims + + x = input + x = downscale(dims)(x) + x = downscale(dims*2)(x) + x = downscale(dims*4)(x) + x = downscale(dims*8)(x) + + x = Flatten()(x) + return x + return func + + @staticmethod + def LIAEInterFlow(resolution, ae_dims=256, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + lowest_dense_res=resolution // 16 + + def func(input): + x = input[0] + x = Dense(ae_dims)(x) + x = Dense(lowest_dense_res * lowest_dense_res * ae_dims*2)(x) + x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims*2))(x) + x = upscale(ae_dims*2)(x) + return x + return func + + @staticmethod + def LIAEDecFlow(output_nc,ch_dims, multiscale_count=1, add_residual_blocks=False, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + to_bgr = partial(SAEModel.to_bgr, **kwargs) + dims = output_nc * ch_dims + ResidualBlock = partial(SAEModel.ResidualBlock, **kwargs) + + def func(input): + x = input[0] + + outputs = [] + x1 = upscale(dims*8)( x ) + + if add_residual_blocks: + x1 = ResidualBlock(dims*8)(x1) + x1 = ResidualBlock(dims*8)(x1) + + if multiscale_count >= 3: + outputs += [ to_bgr(output_nc) ( x1 ) ] + + x2 = upscale(dims*4)( x1 ) + + if add_residual_blocks: + x2 = ResidualBlock(dims*4)(x2) + x2 = ResidualBlock(dims*4)(x2) + + if multiscale_count >= 2: + outputs += [ to_bgr(output_nc) ( x2 ) ] + + x3 = upscale(dims*2)( x2 ) + + if add_residual_blocks: + x3 = ResidualBlock( dims*2)(x3) + x3 = ResidualBlock( dims*2)(x3) + + outputs += [ to_bgr(output_nc) ( x3 ) ] + + return outputs + return func + + @staticmethod + def DFEncFlow(resolution, ae_dims, ch_dims, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + downscale = partial(SAEModel.downscale, **kwargs)#, kernel_regularizer=keras.regularizers.l2(0.0), + lowest_dense_res = resolution // 16 + + def func(input): + x = input + + dims = K.int_shape(input)[-1]*ch_dims + x = downscale(dims)(x) + x = downscale(dims*2)(x) + x = downscale(dims*4)(x) + x = downscale(dims*8)(x) + + x = Dense(ae_dims)(Flatten()(x)) + x = Dense(lowest_dense_res * lowest_dense_res * ae_dims)(x) + x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x) + x = upscale(ae_dims)(x) + return x + return func + + @staticmethod + def DFDecFlow(output_nc, ch_dims, multiscale_count=1, add_residual_blocks=False, **kwargs): + exec (nnlib.import_all(), locals(), globals()) + upscale = partial(SAEModel.upscale, **kwargs) + to_bgr = partial(SAEModel.to_bgr, **kwargs) + dims = output_nc * ch_dims + ResidualBlock = partial(SAEModel.ResidualBlock, **kwargs) + + def func(input): + x = input[0] + + outputs = [] + x1 = upscale(dims*8)( x ) + + if add_residual_blocks: + x1 = ResidualBlock( dims*8 )(x1) + x1 = ResidualBlock( dims*8 )(x1) + + if multiscale_count >= 3: + outputs += [ to_bgr(output_nc) ( x1 ) ] + + x2 = upscale(dims*4)( x1 ) + + if add_residual_blocks: + x2 = ResidualBlock( dims*4)(x2) + x2 = ResidualBlock( dims*4)(x2) + + if multiscale_count >= 2: + outputs += [ to_bgr(output_nc) ( x2 ) ] + + x3 = upscale(dims*2)( x2 ) + + if add_residual_blocks: + x3 = ResidualBlock( dims*2)(x3) + x3 = ResidualBlock( dims*2)(x3) + + outputs += [ to_bgr(output_nc) ( x3 ) ] + + return outputs + return func + + Model = SAEModel \ No newline at end of file diff --git a/models/Model_SAE/__init__.py b/models/Model_SAE/__init__.py index 704b01d..0188f11 100644 --- a/models/Model_SAE/__init__.py +++ b/models/Model_SAE/__init__.py @@ -1 +1 @@ -from .Model import Model +from .Model import Model diff --git a/models/__init__.py b/models/__init__.py index f27088d..971091d 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -1,5 +1,5 @@ -from .ModelBase import ModelBase - -def import_model(name): - module = __import__('Model_'+name, globals(), locals(), [], 1) - return getattr(module, 'Model') +from .ModelBase import ModelBase + +def import_model(name): + module = __import__('Model_'+name, globals(), locals(), [], 1) + return getattr(module, 'Model') diff --git a/nnlib/CAInitializer.py b/nnlib/CAInitializer.py index 4245334..f81dd06 100644 --- a/nnlib/CAInitializer.py +++ b/nnlib/CAInitializer.py @@ -1,112 +1,112 @@ -import numpy as np - -def _compute_fans(shape, data_format='channels_last'): - """Computes the number of input and output units for a weight shape. - # Arguments - shape: Integer shape tuple. - data_format: Image data format to use for convolution kernels. - Note that all kernels in Keras are standardized on the - `channels_last` ordering (even when inputs are set - to `channels_first`). - # Returns - A tuple of scalars, `(fan_in, fan_out)`. - # Raises - ValueError: in case of invalid `data_format` argument. - """ - if len(shape) == 2: - fan_in = shape[0] - fan_out = shape[1] - elif len(shape) in {3, 4, 5}: - # Assuming convolution kernels (1D, 2D or 3D). - # TH kernel shape: (depth, input_depth, ...) - # TF kernel shape: (..., input_depth, depth) - if data_format == 'channels_first': - receptive_field_size = np.prod(shape[2:]) - fan_in = shape[1] * receptive_field_size - fan_out = shape[0] * receptive_field_size - elif data_format == 'channels_last': - receptive_field_size = np.prod(shape[:-2]) - fan_in = shape[-2] * receptive_field_size - fan_out = shape[-1] * receptive_field_size - else: - raise ValueError('Invalid data_format: ' + data_format) - else: - # No specific assumptions. - fan_in = np.sqrt(np.prod(shape)) - fan_out = np.sqrt(np.prod(shape)) - return fan_in, fan_out - -def _create_basis(filters, size, floatx, eps_std): - if size == 1: - return np.random.normal(0.0, eps_std, (filters, size)) - - nbb = filters // size + 1 - li = [] - for i in range(nbb): - a = np.random.normal(0.0, 1.0, (size, size)) - a = _symmetrize(a) - u, _, v = np.linalg.svd(a) - li.extend(u.T.tolist()) - p = np.array(li[:filters], dtype=floatx) - return p - -def _symmetrize(a): - return a + a.T - np.diag(a.diagonal()) - -def _scale_filters(filters, variance): - c_var = np.var(filters) - p = np.sqrt(variance / c_var) - return filters * p - -def CAGenerateWeights ( shape, floatx, data_format, eps_std=0.05, seed=None ): - if seed is not None: - np.random.seed(seed) - - fan_in, fan_out = _compute_fans(shape, data_format) - variance = 2 / fan_in - - rank = len(shape) - if rank == 3: - row, stack_size, filters_size = shape - - transpose_dimensions = (2, 1, 0) - kernel_shape = (row,) - correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0]) - correct_fft = np.fft.rfft - - elif rank == 4: - row, column, stack_size, filters_size = shape - - transpose_dimensions = (2, 3, 1, 0) - kernel_shape = (row, column) - correct_ifft = np.fft.irfft2 - correct_fft = np.fft.rfft2 - - elif rank == 5: - x, y, z, stack_size, filters_size = shape - - transpose_dimensions = (3, 4, 0, 1, 2) - kernel_shape = (x, y, z) - correct_fft = np.fft.rfftn - correct_ifft = np.fft.irfftn - else: - raise ValueError('rank unsupported') - - kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape - - init = [] - for i in range(filters_size): - basis = _create_basis( - stack_size, np.prod(kernel_fourier_shape), floatx, eps_std) - basis = basis.reshape((stack_size,) + kernel_fourier_shape) - - filters = [correct_ifft(x, kernel_shape) + - np.random.normal(0, eps_std, kernel_shape) for - x in basis] - - init.append(filters) - - # Format of array is now: filters, stack, row, column - init = np.array(init) - init = _scale_filters(init, variance) - return init.transpose(transpose_dimensions) +import numpy as np + +def _compute_fans(shape, data_format='channels_last'): + """Computes the number of input and output units for a weight shape. + # Arguments + shape: Integer shape tuple. + data_format: Image data format to use for convolution kernels. + Note that all kernels in Keras are standardized on the + `channels_last` ordering (even when inputs are set + to `channels_first`). + # Returns + A tuple of scalars, `(fan_in, fan_out)`. + # Raises + ValueError: in case of invalid `data_format` argument. + """ + if len(shape) == 2: + fan_in = shape[0] + fan_out = shape[1] + elif len(shape) in {3, 4, 5}: + # Assuming convolution kernels (1D, 2D or 3D). + # TH kernel shape: (depth, input_depth, ...) + # TF kernel shape: (..., input_depth, depth) + if data_format == 'channels_first': + receptive_field_size = np.prod(shape[2:]) + fan_in = shape[1] * receptive_field_size + fan_out = shape[0] * receptive_field_size + elif data_format == 'channels_last': + receptive_field_size = np.prod(shape[:-2]) + fan_in = shape[-2] * receptive_field_size + fan_out = shape[-1] * receptive_field_size + else: + raise ValueError('Invalid data_format: ' + data_format) + else: + # No specific assumptions. + fan_in = np.sqrt(np.prod(shape)) + fan_out = np.sqrt(np.prod(shape)) + return fan_in, fan_out + +def _create_basis(filters, size, floatx, eps_std): + if size == 1: + return np.random.normal(0.0, eps_std, (filters, size)) + + nbb = filters // size + 1 + li = [] + for i in range(nbb): + a = np.random.normal(0.0, 1.0, (size, size)) + a = _symmetrize(a) + u, _, v = np.linalg.svd(a) + li.extend(u.T.tolist()) + p = np.array(li[:filters], dtype=floatx) + return p + +def _symmetrize(a): + return a + a.T - np.diag(a.diagonal()) + +def _scale_filters(filters, variance): + c_var = np.var(filters) + p = np.sqrt(variance / c_var) + return filters * p + +def CAGenerateWeights ( shape, floatx, data_format, eps_std=0.05, seed=None ): + if seed is not None: + np.random.seed(seed) + + fan_in, fan_out = _compute_fans(shape, data_format) + variance = 2 / fan_in + + rank = len(shape) + if rank == 3: + row, stack_size, filters_size = shape + + transpose_dimensions = (2, 1, 0) + kernel_shape = (row,) + correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0]) + correct_fft = np.fft.rfft + + elif rank == 4: + row, column, stack_size, filters_size = shape + + transpose_dimensions = (2, 3, 1, 0) + kernel_shape = (row, column) + correct_ifft = np.fft.irfft2 + correct_fft = np.fft.rfft2 + + elif rank == 5: + x, y, z, stack_size, filters_size = shape + + transpose_dimensions = (3, 4, 0, 1, 2) + kernel_shape = (x, y, z) + correct_fft = np.fft.rfftn + correct_ifft = np.fft.irfftn + else: + raise ValueError('rank unsupported') + + kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape + + init = [] + for i in range(filters_size): + basis = _create_basis( + stack_size, np.prod(kernel_fourier_shape), floatx, eps_std) + basis = basis.reshape((stack_size,) + kernel_fourier_shape) + + filters = [correct_ifft(x, kernel_shape) + + np.random.normal(0, eps_std, kernel_shape) for + x in basis] + + init.append(filters) + + # Format of array is now: filters, stack, row, column + init = np.array(init) + init = _scale_filters(init, variance) + return init.transpose(transpose_dimensions) diff --git a/nnlib/__init__.py b/nnlib/__init__.py index 1579fe5..14793f7 100644 --- a/nnlib/__init__.py +++ b/nnlib/__init__.py @@ -1 +1 @@ -from .nnlib import nnlib +from .nnlib import nnlib diff --git a/nnlib/device.py b/nnlib/device.py index e1ad8d7..144de43 100644 --- a/nnlib/device.py +++ b/nnlib/device.py @@ -1,357 +1,357 @@ -import os -import json -import numpy as np -from .pynvml import * - -#you can set DFL_TF_MIN_REQ_CAP manually for your build -#the reason why we cannot check tensorflow.version is it requires import tensorflow -tf_min_req_cap = int(os.environ.get("DFL_TF_MIN_REQ_CAP", 35)) - -class device: - backend = None - class Config(): - force_gpu_idx = -1 - multi_gpu = False - force_gpu_idxs = None - choose_worst_gpu = False - gpu_idxs = [] - gpu_names = [] - gpu_compute_caps = [] - gpu_vram_gb = [] - allow_growth = True - use_fp16 = False - cpu_only = False - backend = None - def __init__ (self, force_gpu_idx = -1, - multi_gpu = False, - force_gpu_idxs = None, - choose_worst_gpu = False, - allow_growth = True, - use_fp16 = False, - cpu_only = False, - **in_options): - - self.backend = device.backend - self.use_fp16 = use_fp16 - self.cpu_only = cpu_only - - if not self.cpu_only: - self.cpu_only = (self.backend == "tensorflow-cpu") - - if not self.cpu_only: - self.force_gpu_idx = force_gpu_idx - self.multi_gpu = multi_gpu - self.force_gpu_idxs = force_gpu_idxs - self.choose_worst_gpu = choose_worst_gpu - self.allow_growth = allow_growth - - self.gpu_idxs = [] - - if force_gpu_idxs is not None: - for idx in force_gpu_idxs.split(','): - idx = int(idx) - if device.isValidDeviceIdx(idx): - self.gpu_idxs.append(idx) - else: - gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx() - if gpu_idx != -1: - if self.multi_gpu: - self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx ) - if len(self.gpu_idxs) <= 1: - self.multi_gpu = False - else: - self.gpu_idxs = [gpu_idx] - - self.cpu_only = (len(self.gpu_idxs) == 0) - - - if not self.cpu_only: - self.gpu_names = [] - self.gpu_compute_caps = [] - self.gpu_vram_gb = [] - for gpu_idx in self.gpu_idxs: - self.gpu_names += [device.getDeviceName(gpu_idx)] - self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ] - self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ] - self.cpu_only = (len(self.gpu_idxs) == 0) - else: - self.gpu_names = ['CPU'] - self.gpu_compute_caps = [99] - self.gpu_vram_gb = [0] - - if self.cpu_only: - self.backend = "tensorflow-cpu" - - @staticmethod - def getValidDeviceIdxsEnumerator(): - if device.backend == "plaidML": - for i in range(plaidML_devices_count): - yield i - elif device.backend == "tensorflow": - for gpu_idx in range(nvmlDeviceGetCount()): - cap = device.getDeviceComputeCapability (gpu_idx) - if cap >= tf_min_req_cap: - yield gpu_idx - elif device.backend == "tensorflow-generic": - yield 0 - - - @staticmethod - def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb): - result = [] - if device.backend == "plaidML": - for i in device.getValidDeviceIdxsEnumerator(): - if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024: - result.append (i) - elif device.backend == "tensorflow": - for i in device.getValidDeviceIdxsEnumerator(): - handle = nvmlDeviceGetHandleByIndex(i) - memInfo = nvmlDeviceGetMemoryInfo( handle ) - if (memInfo.total) >= totalmemsize_gb*1024*1024*1024: - result.append (i) - elif device.backend == "tensorflow-generic": - return [0] - - return result - - @staticmethod - def getAllDevicesIdxsList(): - if device.backend == "plaidML": - return [ *range(plaidML_devices_count) ] - elif device.backend == "tensorflow": - return [ *range(nvmlDeviceGetCount() ) ] - elif device.backend == "tensorflow-generic": - return [0] - - @staticmethod - def getValidDevicesIdxsWithNamesList(): - if device.backend == "plaidML": - return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ] - elif device.backend == "tensorflow": - return [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in device.getValidDeviceIdxsEnumerator() ] - elif device.backend == "tensorflow-cpu": - return [ (0, 'CPU') ] - elif device.backend == "tensorflow-generic": - return [ (0, device.getDeviceName(0) ) ] - - @staticmethod - def getDeviceVRAMTotalGb (idx): - if device.backend == "plaidML": - if idx < plaidML_devices_count: - return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024) - elif device.backend == "tensorflow": - if idx < nvmlDeviceGetCount(): - memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) ) - return round ( memInfo.total / (1024*1024*1024) ) - - return 0 - elif device.backend == "tensorflow-generic": - return 2 - - @staticmethod - def getBestValidDeviceIdx(): - if device.backend == "plaidML": - idx = -1 - idx_mem = 0 - for i in device.getValidDeviceIdxsEnumerator(): - total = plaidML_devices[i]['globalMemSize'] - if total > idx_mem: - idx = i - idx_mem = total - - return idx - elif device.backend == "tensorflow": - idx = -1 - idx_mem = 0 - for i in device.getValidDeviceIdxsEnumerator(): - memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) ) - if memInfo.total > idx_mem: - idx = i - idx_mem = memInfo.total - - return idx - elif device.backend == "tensorflow-generic": - return 0 - - @staticmethod - def getWorstValidDeviceIdx(): - if device.backend == "plaidML": - idx = -1 - idx_mem = sys.maxsize - for i in device.getValidDeviceIdxsEnumerator(): - total = plaidML_devices[i]['globalMemSize'] - if total < idx_mem: - idx = i - idx_mem = total - - return idx - elif device.backend == "tensorflow": - idx = -1 - idx_mem = sys.maxsize - for i in device.getValidDeviceIdxsEnumerator(): - memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) ) - if memInfo.total < idx_mem: - idx = i - idx_mem = memInfo.total - - return idx - elif device.backend == "tensorflow-generic": - return 0 - - @staticmethod - def isValidDeviceIdx(idx): - if device.backend == "plaidML": - return idx in [*device.getValidDeviceIdxsEnumerator()] - elif device.backend == "tensorflow": - return idx in [*device.getValidDeviceIdxsEnumerator()] - elif device.backend == "tensorflow-generic": - return (idx == 0) - - @staticmethod - def getDeviceIdxsEqualModel(idx): - if device.backend == "plaidML": - result = [] - idx_name = plaidML_devices[idx]['description'] - for i in device.getValidDeviceIdxsEnumerator(): - if plaidML_devices[i]['description'] == idx_name: - result.append (i) - - return result - elif device.backend == "tensorflow": - result = [] - idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() - for i in device.getValidDeviceIdxsEnumerator(): - if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name: - result.append (i) - - return result - elif device.backend == "tensorflow-generic": - return [0] if idx == 0 else [] - - @staticmethod - def getDeviceName (idx): - if device.backend == "plaidML": - if idx < plaidML_devices_count: - return plaidML_devices[idx]['description'] - elif device.backend == "tensorflow": - if idx < nvmlDeviceGetCount(): - return nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() - elif device.backend == "tensorflow-generic": - if idx == 0: - return "Generic GeForce GPU" - - return None - - @staticmethod - def getDeviceID (idx): - if device.backend == "plaidML": - if idx < plaidML_devices_count: - return plaidML_devices[idx]['id'].decode() - - return None - - @staticmethod - def getDeviceComputeCapability(idx): - result = 0 - if device.backend == "plaidML": - return 99 - elif device.backend == "tensorflow": - if idx < nvmlDeviceGetCount(): - result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx)) - elif device.backend == "tensorflow-generic": - return 99 if idx == 0 else 0 - - return result[0] * 10 + result[1] - - -force_plaidML = os.environ.get("DFL_FORCE_PLAIDML", "0") == "1" #for OpenCL build , forcing using plaidML even if NVIDIA found -force_tf_cpu = os.environ.get("DFL_FORCE_TF_CPU", "0") == "1" #for OpenCL build , forcing using tf-cpu if plaidML failed -has_nvml = False -has_nvml_cap = False - -#use DFL_FORCE_HAS_NVIDIA_DEVICE=1 if -#- your NVIDIA cannot be seen by OpenCL -#- CUDA build of DFL -has_nvidia_device = os.environ.get("DFL_FORCE_HAS_NVIDIA_DEVICE", "0") == "1" - -plaidML_devices = None -def get_plaidML_devices(): - global plaidML_devices - global has_nvidia_device - if plaidML_devices is None: - plaidML_devices = [] - # Using plaidML OpenCL backend to determine system devices and has_nvidia_device - try: - os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup' - import plaidml - ctx = plaidml.Context() - for d in plaidml.devices(ctx, return_all=True)[0]: - details = json.loads(d.details) - if details['type'] == 'CPU': #skipping opencl-CPU - continue - if 'nvidia' in details['vendor'].lower(): - has_nvidia_device = True - plaidML_devices += [ {'id':d.id, - 'globalMemSize' : int(details['globalMemSize']), - 'description' : d.description.decode() - }] - ctx.shutdown() - except: - pass - return plaidML_devices - -if not has_nvidia_device: - get_plaidML_devices() - -#choosing backend - -if device.backend is None and not force_tf_cpu: - #first trying to load NVSMI and detect CUDA devices for tensorflow backend, - #even force_plaidML is choosed, because if plaidML will fail, we can choose tensorflow - try: - nvmlInit() - has_nvml = True - device.backend = "tensorflow" #set tensorflow backend in order to use device.*device() functions - - gpu_idxs = device.getAllDevicesIdxsList() - gpu_caps = np.array ( [ device.getDeviceComputeCapability(gpu_idx) for gpu_idx in gpu_idxs ] ) - - if len ( np.ndarray.flatten ( np.argwhere (gpu_caps >= tf_min_req_cap) ) ) == 0: - if not force_plaidML: - print ("No CUDA devices found with minimum required compute capability: %d.%d. Falling back to OpenCL mode." % (tf_min_req_cap // 10, tf_min_req_cap % 10) ) - device.backend = None - nvmlShutdown() - else: - has_nvml_cap = True - except: - #if no NVSMI installed exception will occur - device.backend = None - has_nvml = False - -if force_plaidML or (device.backend is None and not has_nvidia_device): - #tensorflow backend was failed without has_nvidia_device , or forcing plaidML, trying to use plaidML backend - if len(get_plaidML_devices()) == 0: - #print ("plaidML: No capable OpenCL devices found. Falling back to tensorflow backend.") - device.backend = None - else: - device.backend = "plaidML" - plaidML_devices_count = len(get_plaidML_devices()) - -if device.backend is None: - if force_tf_cpu: - device.backend = "tensorflow-cpu" - elif not has_nvml: - if has_nvidia_device: - #some notebook systems have NVIDIA card without NVSMI in official drivers - #in that case considering we have system with one capable GPU and let tensorflow to choose best GPU - device.backend = "tensorflow-generic" - else: - #no NVSMI and no NVIDIA cards, also plaidML was failed, then CPU only - device.backend = "tensorflow-cpu" - else: - if has_nvml_cap: - #has NVSMI and capable CUDA-devices, but force_plaidML was failed, then we choosing tensorflow - device.backend = "tensorflow" - else: - #has NVSMI, no capable CUDA-devices, also plaidML was failed, then CPU only - device.backend = "tensorflow-cpu" +import os +import json +import numpy as np +from .pynvml import * + +#you can set DFL_TF_MIN_REQ_CAP manually for your build +#the reason why we cannot check tensorflow.version is it requires import tensorflow +tf_min_req_cap = int(os.environ.get("DFL_TF_MIN_REQ_CAP", 35)) + +class device: + backend = None + class Config(): + force_gpu_idx = -1 + multi_gpu = False + force_gpu_idxs = None + choose_worst_gpu = False + gpu_idxs = [] + gpu_names = [] + gpu_compute_caps = [] + gpu_vram_gb = [] + allow_growth = True + use_fp16 = False + cpu_only = False + backend = None + def __init__ (self, force_gpu_idx = -1, + multi_gpu = False, + force_gpu_idxs = None, + choose_worst_gpu = False, + allow_growth = True, + use_fp16 = False, + cpu_only = False, + **in_options): + + self.backend = device.backend + self.use_fp16 = use_fp16 + self.cpu_only = cpu_only + + if not self.cpu_only: + self.cpu_only = (self.backend == "tensorflow-cpu") + + if not self.cpu_only: + self.force_gpu_idx = force_gpu_idx + self.multi_gpu = multi_gpu + self.force_gpu_idxs = force_gpu_idxs + self.choose_worst_gpu = choose_worst_gpu + self.allow_growth = allow_growth + + self.gpu_idxs = [] + + if force_gpu_idxs is not None: + for idx in force_gpu_idxs.split(','): + idx = int(idx) + if device.isValidDeviceIdx(idx): + self.gpu_idxs.append(idx) + else: + gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx() + if gpu_idx != -1: + if self.multi_gpu: + self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx ) + if len(self.gpu_idxs) <= 1: + self.multi_gpu = False + else: + self.gpu_idxs = [gpu_idx] + + self.cpu_only = (len(self.gpu_idxs) == 0) + + + if not self.cpu_only: + self.gpu_names = [] + self.gpu_compute_caps = [] + self.gpu_vram_gb = [] + for gpu_idx in self.gpu_idxs: + self.gpu_names += [device.getDeviceName(gpu_idx)] + self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ] + self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ] + self.cpu_only = (len(self.gpu_idxs) == 0) + else: + self.gpu_names = ['CPU'] + self.gpu_compute_caps = [99] + self.gpu_vram_gb = [0] + + if self.cpu_only: + self.backend = "tensorflow-cpu" + + @staticmethod + def getValidDeviceIdxsEnumerator(): + if device.backend == "plaidML": + for i in range(plaidML_devices_count): + yield i + elif device.backend == "tensorflow": + for gpu_idx in range(nvmlDeviceGetCount()): + cap = device.getDeviceComputeCapability (gpu_idx) + if cap >= tf_min_req_cap: + yield gpu_idx + elif device.backend == "tensorflow-generic": + yield 0 + + + @staticmethod + def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb): + result = [] + if device.backend == "plaidML": + for i in device.getValidDeviceIdxsEnumerator(): + if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024: + result.append (i) + elif device.backend == "tensorflow": + for i in device.getValidDeviceIdxsEnumerator(): + handle = nvmlDeviceGetHandleByIndex(i) + memInfo = nvmlDeviceGetMemoryInfo( handle ) + if (memInfo.total) >= totalmemsize_gb*1024*1024*1024: + result.append (i) + elif device.backend == "tensorflow-generic": + return [0] + + return result + + @staticmethod + def getAllDevicesIdxsList(): + if device.backend == "plaidML": + return [ *range(plaidML_devices_count) ] + elif device.backend == "tensorflow": + return [ *range(nvmlDeviceGetCount() ) ] + elif device.backend == "tensorflow-generic": + return [0] + + @staticmethod + def getValidDevicesIdxsWithNamesList(): + if device.backend == "plaidML": + return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ] + elif device.backend == "tensorflow": + return [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in device.getValidDeviceIdxsEnumerator() ] + elif device.backend == "tensorflow-cpu": + return [ (0, 'CPU') ] + elif device.backend == "tensorflow-generic": + return [ (0, device.getDeviceName(0) ) ] + + @staticmethod + def getDeviceVRAMTotalGb (idx): + if device.backend == "plaidML": + if idx < plaidML_devices_count: + return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024) + elif device.backend == "tensorflow": + if idx < nvmlDeviceGetCount(): + memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) ) + return round ( memInfo.total / (1024*1024*1024) ) + + return 0 + elif device.backend == "tensorflow-generic": + return 2 + + @staticmethod + def getBestValidDeviceIdx(): + if device.backend == "plaidML": + idx = -1 + idx_mem = 0 + for i in device.getValidDeviceIdxsEnumerator(): + total = plaidML_devices[i]['globalMemSize'] + if total > idx_mem: + idx = i + idx_mem = total + + return idx + elif device.backend == "tensorflow": + idx = -1 + idx_mem = 0 + for i in device.getValidDeviceIdxsEnumerator(): + memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) ) + if memInfo.total > idx_mem: + idx = i + idx_mem = memInfo.total + + return idx + elif device.backend == "tensorflow-generic": + return 0 + + @staticmethod + def getWorstValidDeviceIdx(): + if device.backend == "plaidML": + idx = -1 + idx_mem = sys.maxsize + for i in device.getValidDeviceIdxsEnumerator(): + total = plaidML_devices[i]['globalMemSize'] + if total < idx_mem: + idx = i + idx_mem = total + + return idx + elif device.backend == "tensorflow": + idx = -1 + idx_mem = sys.maxsize + for i in device.getValidDeviceIdxsEnumerator(): + memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) ) + if memInfo.total < idx_mem: + idx = i + idx_mem = memInfo.total + + return idx + elif device.backend == "tensorflow-generic": + return 0 + + @staticmethod + def isValidDeviceIdx(idx): + if device.backend == "plaidML": + return idx in [*device.getValidDeviceIdxsEnumerator()] + elif device.backend == "tensorflow": + return idx in [*device.getValidDeviceIdxsEnumerator()] + elif device.backend == "tensorflow-generic": + return (idx == 0) + + @staticmethod + def getDeviceIdxsEqualModel(idx): + if device.backend == "plaidML": + result = [] + idx_name = plaidML_devices[idx]['description'] + for i in device.getValidDeviceIdxsEnumerator(): + if plaidML_devices[i]['description'] == idx_name: + result.append (i) + + return result + elif device.backend == "tensorflow": + result = [] + idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() + for i in device.getValidDeviceIdxsEnumerator(): + if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name: + result.append (i) + + return result + elif device.backend == "tensorflow-generic": + return [0] if idx == 0 else [] + + @staticmethod + def getDeviceName (idx): + if device.backend == "plaidML": + if idx < plaidML_devices_count: + return plaidML_devices[idx]['description'] + elif device.backend == "tensorflow": + if idx < nvmlDeviceGetCount(): + return nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() + elif device.backend == "tensorflow-generic": + if idx == 0: + return "Generic GeForce GPU" + + return None + + @staticmethod + def getDeviceID (idx): + if device.backend == "plaidML": + if idx < plaidML_devices_count: + return plaidML_devices[idx]['id'].decode() + + return None + + @staticmethod + def getDeviceComputeCapability(idx): + result = 0 + if device.backend == "plaidML": + return 99 + elif device.backend == "tensorflow": + if idx < nvmlDeviceGetCount(): + result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx)) + elif device.backend == "tensorflow-generic": + return 99 if idx == 0 else 0 + + return result[0] * 10 + result[1] + + +force_plaidML = os.environ.get("DFL_FORCE_PLAIDML", "0") == "1" #for OpenCL build , forcing using plaidML even if NVIDIA found +force_tf_cpu = os.environ.get("DFL_FORCE_TF_CPU", "0") == "1" #for OpenCL build , forcing using tf-cpu if plaidML failed +has_nvml = False +has_nvml_cap = False + +#use DFL_FORCE_HAS_NVIDIA_DEVICE=1 if +#- your NVIDIA cannot be seen by OpenCL +#- CUDA build of DFL +has_nvidia_device = os.environ.get("DFL_FORCE_HAS_NVIDIA_DEVICE", "0") == "1" + +plaidML_devices = None +def get_plaidML_devices(): + global plaidML_devices + global has_nvidia_device + if plaidML_devices is None: + plaidML_devices = [] + # Using plaidML OpenCL backend to determine system devices and has_nvidia_device + try: + os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup' + import plaidml + ctx = plaidml.Context() + for d in plaidml.devices(ctx, return_all=True)[0]: + details = json.loads(d.details) + if details['type'] == 'CPU': #skipping opencl-CPU + continue + if 'nvidia' in details['vendor'].lower(): + has_nvidia_device = True + plaidML_devices += [ {'id':d.id, + 'globalMemSize' : int(details['globalMemSize']), + 'description' : d.description.decode() + }] + ctx.shutdown() + except: + pass + return plaidML_devices + +if not has_nvidia_device: + get_plaidML_devices() + +#choosing backend + +if device.backend is None and not force_tf_cpu: + #first trying to load NVSMI and detect CUDA devices for tensorflow backend, + #even force_plaidML is choosed, because if plaidML will fail, we can choose tensorflow + try: + nvmlInit() + has_nvml = True + device.backend = "tensorflow" #set tensorflow backend in order to use device.*device() functions + + gpu_idxs = device.getAllDevicesIdxsList() + gpu_caps = np.array ( [ device.getDeviceComputeCapability(gpu_idx) for gpu_idx in gpu_idxs ] ) + + if len ( np.ndarray.flatten ( np.argwhere (gpu_caps >= tf_min_req_cap) ) ) == 0: + if not force_plaidML: + print ("No CUDA devices found with minimum required compute capability: %d.%d. Falling back to OpenCL mode." % (tf_min_req_cap // 10, tf_min_req_cap % 10) ) + device.backend = None + nvmlShutdown() + else: + has_nvml_cap = True + except: + #if no NVSMI installed exception will occur + device.backend = None + has_nvml = False + +if force_plaidML or (device.backend is None and not has_nvidia_device): + #tensorflow backend was failed without has_nvidia_device , or forcing plaidML, trying to use plaidML backend + if len(get_plaidML_devices()) == 0: + #print ("plaidML: No capable OpenCL devices found. Falling back to tensorflow backend.") + device.backend = None + else: + device.backend = "plaidML" + plaidML_devices_count = len(get_plaidML_devices()) + +if device.backend is None: + if force_tf_cpu: + device.backend = "tensorflow-cpu" + elif not has_nvml: + if has_nvidia_device: + #some notebook systems have NVIDIA card without NVSMI in official drivers + #in that case considering we have system with one capable GPU and let tensorflow to choose best GPU + device.backend = "tensorflow-generic" + else: + #no NVSMI and no NVIDIA cards, also plaidML was failed, then CPU only + device.backend = "tensorflow-cpu" + else: + if has_nvml_cap: + #has NVSMI and capable CUDA-devices, but force_plaidML was failed, then we choosing tensorflow + device.backend = "tensorflow" + else: + #has NVSMI, no capable CUDA-devices, also plaidML was failed, then CPU only + device.backend = "tensorflow-cpu" diff --git a/nnlib/nnlib.py b/nnlib/nnlib.py index 62a39c9..edcb201 100644 --- a/nnlib/nnlib.py +++ b/nnlib/nnlib.py @@ -1,1048 +1,1048 @@ -import os -import sys -import contextlib -import numpy as np - -from .CAInitializer import CAGenerateWeights -import multiprocessing -from joblib import Subprocessor - -from utils import std_utils -from .device import device -from interact import interact as io - -class nnlib(object): - device = device #forwards nnlib.devicelib to device in order to use nnlib as standalone lib - DeviceConfig = device.Config - active_DeviceConfig = DeviceConfig() #default is one best GPU - - backend = "" - - dlib = None - - keras = None - keras_contrib = None - - tf = None - tf_sess = None - - PML = None - PMLK = None - PMLTile= None - - code_import_keras = None - code_import_keras_contrib = None - code_import_all = None - - code_import_dlib = None - - - ResNet = None - UNet = None - UNetTemporalPredictor = None - NLayerDiscriminator = None - - code_import_keras_string = \ -""" -keras = nnlib.keras -K = keras.backend -KL = keras.layers - -Input = KL.Input - -Dense = KL.Dense -Conv2D = nnlib.Conv2D -Conv2DTranspose = nnlib.Conv2DTranspose -SeparableConv2D = KL.SeparableConv2D -MaxPooling2D = KL.MaxPooling2D -UpSampling2D = KL.UpSampling2D -BatchNormalization = KL.BatchNormalization - -LeakyReLU = KL.LeakyReLU -ReLU = KL.ReLU -PReLU = KL.PReLU -tanh = KL.Activation('tanh') -sigmoid = KL.Activation('sigmoid') -Dropout = KL.Dropout -Softmax = KL.Softmax - -Lambda = KL.Lambda -Add = KL.Add -Concatenate = KL.Concatenate - - -Flatten = KL.Flatten -Reshape = KL.Reshape - -ZeroPadding2D = KL.ZeroPadding2D - -RandomNormal = keras.initializers.RandomNormal -Model = keras.models.Model - -Adam = nnlib.Adam - -modelify = nnlib.modelify -gaussian_blur = nnlib.gaussian_blur -style_loss = nnlib.style_loss -dssim = nnlib.dssim - -PixelShuffler = nnlib.PixelShuffler -SubpixelUpscaler = nnlib.SubpixelUpscaler -Scale = nnlib.Scale - -CAInitializerMP = nnlib.CAInitializerMP - -#ReflectionPadding2D = nnlib.ReflectionPadding2D -#AddUniformNoise = nnlib.AddUniformNoise -""" - code_import_keras_contrib_string = \ -""" -keras_contrib = nnlib.keras_contrib -GroupNormalization = keras_contrib.layers.GroupNormalization -InstanceNormalization = keras_contrib.layers.InstanceNormalization -""" - code_import_dlib_string = \ -""" -dlib = nnlib.dlib -""" - - code_import_all_string = \ -""" -DSSIMMSEMaskLoss = nnlib.DSSIMMSEMaskLoss -ResNet = nnlib.ResNet -UNet = nnlib.UNet -UNetTemporalPredictor = nnlib.UNetTemporalPredictor -NLayerDiscriminator = nnlib.NLayerDiscriminator -""" - - - @staticmethod - def _import_tf(device_config): - if nnlib.tf is not None: - return nnlib.code_import_tf - - if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': - suppressor = std_utils.suppress_stdout_stderr().__enter__() - else: - suppressor = None - - if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): - os.environ.pop('CUDA_VISIBLE_DEVICES') - - os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2' - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #tf log errors only - import tensorflow as tf - nnlib.tf = tf - - if device_config.cpu_only: - config = tf.ConfigProto(device_count={'GPU': 0}) - else: - config = tf.ConfigProto() - - if device_config.backend != "tensorflow-generic": - #tensorflow-generic is system with NVIDIA card, but w/o NVSMI - #so dont hide devices and let tensorflow to choose best card - visible_device_list = '' - for idx in device_config.gpu_idxs: - visible_device_list += str(idx) + ',' - config.gpu_options.visible_device_list=visible_device_list[:-1] - - config.gpu_options.force_gpu_compatible = True - config.gpu_options.allow_growth = device_config.allow_growth - - nnlib.tf_sess = tf.Session(config=config) - - if suppressor is not None: - suppressor.__exit__() - - @staticmethod - def import_keras(device_config): - if nnlib.keras is not None: - return nnlib.code_import_keras - - nnlib.backend = device_config.backend - if "tensorflow" in nnlib.backend: - nnlib._import_tf(device_config) - elif nnlib.backend == "plaidML": - os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" - os.environ["PLAIDML_DEVICE_IDS"] = ",".join ( [ nnlib.device.getDeviceID(idx) for idx in device_config.gpu_idxs] ) - - #if "tensorflow" in nnlib.backend: - # nnlib.keras = nnlib.tf.keras - #else: - import keras as keras_ - nnlib.keras = keras_ - - if 'KERAS_BACKEND' in os.environ: - os.environ.pop('KERAS_BACKEND') - - if nnlib.backend == "plaidML": - import plaidml - import plaidml.tile - nnlib.PML = plaidml - nnlib.PMLK = plaidml.keras.backend - nnlib.PMLTile = plaidml.tile - - if device_config.use_fp16: - nnlib.keras.backend.set_floatx('float16') - - if "tensorflow" in nnlib.backend: - nnlib.keras.backend.set_session(nnlib.tf_sess) - - nnlib.keras.backend.set_image_data_format('channels_last') - - nnlib.code_import_keras = compile (nnlib.code_import_keras_string,'','exec') - nnlib.__initialize_keras_functions() - - return nnlib.code_import_keras - - @staticmethod - def __initialize_keras_functions(): - keras = nnlib.keras - K = keras.backend - KL = keras.layers - backend = nnlib.backend - - def modelify(model_functor): - def func(tensor): - return keras.models.Model (tensor, model_functor(tensor)) - return func - - nnlib.modelify = modelify - - def gaussian_blur(radius=2.0): - def gaussian(x, mu, sigma): - return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2)) - - def make_kernel(sigma): - kernel_size = max(3, int(2 * 2 * sigma + 1)) - mean = np.floor(0.5 * kernel_size) - kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)]) - np_kernel = np.outer(kernel_1d, kernel_1d).astype(dtype=K.floatx()) - kernel = np_kernel / np.sum(np_kernel) - return kernel - - gauss_kernel = make_kernel(radius) - gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis] - - def func(input): - inputs = [ input[:,:,:,i:i+1] for i in range( K.int_shape( input )[-1] ) ] - - outputs = [] - for i in range(len(inputs)): - outputs += [ K.conv2d( inputs[i] , K.constant(gauss_kernel) , strides=(1,1), padding="same") ] - - return K.concatenate (outputs, axis=-1) - return func - nnlib.gaussian_blur = gaussian_blur - - def style_loss(gaussian_blur_radius=0.0, loss_weight=1.0, wnd_size=0, step_size=1): - if gaussian_blur_radius > 0.0: - gblur = gaussian_blur(gaussian_blur_radius) - - def sd(content, style, loss_weight): - content_nc = K.int_shape(content)[-1] - style_nc = K.int_shape(style)[-1] - if content_nc != style_nc: - raise Exception("style_loss() content_nc != style_nc") - - axes = [1,2] - c_mean, c_var = K.mean(content, axis=axes, keepdims=True), K.var(content, axis=axes, keepdims=True) - s_mean, s_var = K.mean(style, axis=axes, keepdims=True), K.var(style, axis=axes, keepdims=True) - c_std, s_std = K.sqrt(c_var + 1e-5), K.sqrt(s_var + 1e-5) - - mean_loss = K.sum(K.square(c_mean-s_mean)) - std_loss = K.sum(K.square(c_std-s_std)) - - return (mean_loss + std_loss) * ( loss_weight / float(content_nc) ) - - def func(target, style): - if wnd_size == 0: - if gaussian_blur_radius > 0.0: - return sd( gblur(target), gblur(style), loss_weight=loss_weight) - else: - return sd( target, style, loss_weight=loss_weight ) - else: - #currently unused - if nnlib.tf is not None: - sh = K.int_shape(target)[1] - k = (sh-wnd_size) // step_size + 1 - if gaussian_blur_radius > 0.0: - target, style = gblur(target), gblur(style) - target = nnlib.tf.image.extract_image_patches(target, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') - style = nnlib.tf.image.extract_image_patches(style, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') - return sd( target, style, loss_weight ) - if nnlib.PML is not None: - print ("Sorry, plaidML backend does not support style_loss") - return 0 - return func - nnlib.style_loss = style_loss - - def dssim(kernel_size=11, k1=0.01, k2=0.03, max_value=1.0): - # port of tf.image.ssim to pure keras in order to work on plaidML backend. - - def func(y_true, y_pred): - ch = K.shape(y_pred)[-1] - - def _fspecial_gauss(size, sigma): - #Function to mimic the 'fspecial' gaussian MATLAB function. - coords = np.arange(0, size, dtype=K.floatx()) - coords -= (size - 1 ) / 2.0 - g = coords**2 - g *= ( -0.5 / (sigma**2) ) - g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) ) - g = K.constant ( np.reshape (g, (1,-1)) ) - g = K.softmax(g) - g = K.reshape (g, (size, size, 1, 1)) - g = K.tile (g, (1,1,ch,1)) - return g - - kernel = _fspecial_gauss(kernel_size,1.5) - - def reducer(x): - return K.depthwise_conv2d(x, kernel, strides=(1, 1), padding='valid') - - c1 = (k1 * max_value) ** 2 - c2 = (k2 * max_value) ** 2 - - mean0 = reducer(y_true) - mean1 = reducer(y_pred) - num0 = mean0 * mean1 * 2.0 - den0 = K.square(mean0) + K.square(mean1) - luminance = (num0 + c1) / (den0 + c1) - - num1 = reducer(y_true * y_pred) * 2.0 - den1 = reducer(K.square(y_true) + K.square(y_pred)) - c2 *= 1.0 #compensation factor - cs = (num1 - num0 + c2) / (den1 - den0 + c2) - - ssim_val = K.mean(luminance * cs, axis=(-3, -2) ) - return(1.0 - ssim_val ) / 2.0 - - return func - - nnlib.dssim = dssim - - if 'tensorflow' in backend: - class PixelShuffler(keras.layers.Layer): - def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): - super(PixelShuffler, self).__init__(**kwargs) - self.data_format = data_format - self.size = size - - def call(self, inputs): - input_shape = K.shape(inputs) - if K.int_shape(input_shape)[0] != 4: - raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) - - if self.data_format == 'channels_first': - return K.tf.depth_to_space(inputs, self.size[0], 'NCHW') - - elif self.data_format == 'channels_last': - return K.tf.depth_to_space(inputs, self.size[0], 'NHWC') - - def compute_output_shape(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - '; Received input shape:', str(input_shape)) - - if self.data_format == 'channels_first': - height = input_shape[2] * self.size[0] if input_shape[2] is not None else None - width = input_shape[3] * self.size[1] if input_shape[3] is not None else None - channels = input_shape[1] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[1]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - channels, - height, - width) - - elif self.data_format == 'channels_last': - height = input_shape[1] * self.size[0] if input_shape[1] is not None else None - width = input_shape[2] * self.size[1] if input_shape[2] is not None else None - channels = input_shape[3] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[3]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - height, - width, - channels) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(PixelShuffler, self).get_config() - - return dict(list(base_config.items()) + list(config.items())) - else: - class PixelShuffler(KL.Layer): - def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): - super(PixelShuffler, self).__init__(**kwargs) - self.data_format = data_format - self.size = size - - def call(self, inputs): - - input_shape = K.shape(inputs) - if K.int_shape(input_shape)[0] != 4: - raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) - - if self.data_format == 'channels_first': - batch_size, c, h, w = input_shape[0], K.int_shape(inputs)[1], input_shape[2], input_shape[3] - rh, rw = self.size - oh, ow = h * rh, w * rw - oc = c // (rh * rw) - - out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w)) - out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) - out = K.reshape(out, (batch_size, oc, oh, ow)) - return out - - elif self.data_format == 'channels_last': - batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1] - rh, rw = self.size - oh, ow = h * rh, w * rw - oc = c // (rh * rw) - - out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc)) - out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) - out = K.reshape(out, (batch_size, oh, ow, oc)) - return out - - def compute_output_shape(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - '; Received input shape:', str(input_shape)) - - if self.data_format == 'channels_first': - height = input_shape[2] * self.size[0] if input_shape[2] is not None else None - width = input_shape[3] * self.size[1] if input_shape[3] is not None else None - channels = input_shape[1] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[1]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - channels, - height, - width) - - elif self.data_format == 'channels_last': - height = input_shape[1] * self.size[0] if input_shape[1] is not None else None - width = input_shape[2] * self.size[1] if input_shape[2] is not None else None - channels = input_shape[3] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[3]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - height, - width, - channels) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(PixelShuffler, self).get_config() - - return dict(list(base_config.items()) + list(config.items())) - - nnlib.PixelShuffler = PixelShuffler - nnlib.SubpixelUpscaler = PixelShuffler - - class Scale(KL.Layer): - """ - GAN Custom Scal Layer - Code borrows from https://github.com/flyyufelix/cnn_finetune - """ - def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs): - self.axis = axis - self.gamma_init = keras.initializers.get(gamma_init) - self.initial_weights = weights - super(Scale, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = [keras.engine.InputSpec(shape=input_shape)] - - # Compatibility with TensorFlow >= 1.0.0 - self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name)) - self.trainable_weights = [self.gamma] - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - - def call(self, x, mask=None): - return self.gamma * x - - def get_config(self): - config = {"axis": self.axis} - base_config = super(Scale, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - nnlib.Scale = Scale - - class Adam(keras.optimizers.Optimizer): - """Adam optimizer. - - Default parameters follow those provided in the original paper. - - # Arguments - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 1. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - amsgrad: boolean. Whether to apply the AMSGrad variant of this - algorithm from the paper "On the Convergence of Adam and - Beyond". - tf_cpu_mode: only for tensorflow backend - 0 - default, no changes. - 1 - allows to train x2 bigger network on same VRAM consuming RAM - 2 - allows to train x3 bigger network on same VRAM consuming RAM*2 and CPU power. - - # References - - [Adam - A Method for Stochastic Optimization] - (https://arxiv.org/abs/1412.6980v8) - - [On the Convergence of Adam and Beyond] - (https://openreview.net/forum?id=ryQu7f-RZ) - """ - - def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, - epsilon=None, decay=0., amsgrad=False, tf_cpu_mode=0, **kwargs): - super(Adam, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - self.decay = K.variable(decay, name='decay') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - self.amsgrad = amsgrad - self.tf_cpu_mode = tf_cpu_mode - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - t = K.cast(self.iterations, K.floatx()) + 1 - lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / - (1. - K.pow(self.beta_1, t))) - - e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None - if e: e.__enter__() - ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - if self.amsgrad: - vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - else: - vhats = [K.zeros(1) for _ in params] - if e: e.__exit__(None, None, None) - - self.weights = [self.iterations] + ms + vs + vhats - - for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): - e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None - if e: e.__enter__() - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) - - if self.amsgrad: - vhat_t = K.maximum(vhat, v_t) - self.updates.append(K.update(vhat, vhat_t)) - if e: e.__exit__(None, None, None) - - if self.amsgrad: - p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) - else: - p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad} - base_config = super(Adam, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - nnlib.Adam = Adam - - def CAInitializerMP( conv_weights_list ): - #Convolution Aware Initialization https://arxiv.org/abs/1702.06295 - result = CAInitializerMPSubprocessor ( [ (i, K.int_shape(conv_weights)) for i, conv_weights in enumerate(conv_weights_list) ], K.floatx(), K.image_data_format() ).run() - for idx, weights in result: - K.set_value ( conv_weights_list[idx], weights ) - nnlib.CAInitializerMP = CAInitializerMP - - - if backend == "plaidML": - class TileOP_ReflectionPadding2D(nnlib.PMLTile.Operation): - def __init__(self, input, w_pad, h_pad): - if K.image_data_format() == 'channels_last': - if input.shape.ndims == 4: - H, W = input.shape.dims[1:3] - if (type(H) == int and h_pad >= H) or \ - (type(W) == int and w_pad >= W): - raise ValueError("Paddings must be less than dimensions.") - - c = """ function (I[B, H, W, C] ) -> (O) {{ - WE = W + {w_pad}*2; - HE = H + {h_pad}*2; - """.format(h_pad=h_pad, w_pad=w_pad) - if w_pad > 0: - c += """ - LEFT_PAD [b, h, w , c : B, H, WE, C ] = =(I[b, h, {w_pad}-w, c]), w < {w_pad} ; - HCENTER [b, h, w , c : B, H, WE, C ] = =(I[b, h, w-{w_pad}, c]), w < W+{w_pad}-1 ; - RIGHT_PAD[b, h, w , c : B, H, WE, C ] = =(I[b, h, 2*W - (w-{w_pad}) -2, c]); - LCR = LEFT_PAD+HCENTER+RIGHT_PAD; - """.format(h_pad=h_pad, w_pad=w_pad) - else: - c += "LCR = I;" - - if h_pad > 0: - c += """ - TOP_PAD [b, h, w , c : B, HE, WE, C ] = =(LCR[b, {h_pad}-h, w, c]), h < {h_pad}; - VCENTER [b, h, w , c : B, HE, WE, C ] = =(LCR[b, h-{h_pad}, w, c]), h < H+{h_pad}-1 ; - BOTTOM_PAD[b, h, w , c : B, HE, WE, C ] = =(LCR[b, 2*H - (h-{h_pad}) -2, w, c]); - TVB = TOP_PAD+VCENTER+BOTTOM_PAD; - """.format(h_pad=h_pad, w_pad=w_pad) - else: - c += "TVB = LCR;" - - c += "O = TVB; }" - - inp_dims = input.shape.dims - out_dims = (inp_dims[0], inp_dims[1]+h_pad*2, inp_dims[2]+w_pad*2, inp_dims[3]) - else: - raise NotImplemented - else: - raise NotImplemented - - super(TileOP_ReflectionPadding2D, self).__init__(c, [('I', input) ], - [('O', nnlib.PMLTile.Shape(input.shape.dtype, out_dims ) )]) - - class ReflectionPadding2D(keras.layers.Layer): - def __init__(self, padding=(1, 1), **kwargs): - self.padding = tuple(padding) - self.input_spec = [keras.layers.InputSpec(ndim=4)] - super(ReflectionPadding2D, self).__init__(**kwargs) - - def compute_output_shape(self, s): - """ If you are using "channels_last" configuration""" - return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3]) - - def call(self, x, mask=None): - w_pad,h_pad = self.padding - if "tensorflow" in backend: - return K.tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT') - elif backend == "plaidML": - return TileOP_ReflectionPadding2D.function(x, self.padding[0], self.padding[1]) - else: - if K.image_data_format() == 'channels_last': - if x.shape.ndims == 4: - w = K.concatenate ([ x[:,:,w_pad:0:-1,:], - x, - x[:,:,-2:-w_pad-2:-1,:] ], axis=2 ) - h = K.concatenate ([ w[:,h_pad:0:-1,:,:], - w, - w[:,-2:-h_pad-2:-1,:,:] ], axis=1 ) - return h - else: - raise NotImplemented - else: - raise NotImplemented - - nnlib.ReflectionPadding2D = ReflectionPadding2D - - class Conv2D(): - def __init__ (self, *args, **kwargs): - self.reflect_pad = False - padding = kwargs.get('padding','') - if padding == 'zero': - kwargs['padding'] = 'same' - if padding == 'reflect': - kernel_size = kwargs['kernel_size'] - if (kernel_size % 2) == 1: - self.pad = (kernel_size // 2,)*2 - kwargs['padding'] = 'valid' - self.reflect_pad = True - self.func = keras.layers.Conv2D (*args, **kwargs) - - def __call__(self,x): - if self.reflect_pad: - x = ReflectionPadding2D( self.pad ) (x) - return self.func(x) - nnlib.Conv2D = Conv2D - - class Conv2DTranspose(): - def __init__ (self, *args, **kwargs): - self.reflect_pad = False - padding = kwargs.get('padding','') - if padding == 'zero': - kwargs['padding'] = 'same' - if padding == 'reflect': - kernel_size = kwargs['kernel_size'] - if (kernel_size % 2) == 1: - self.pad = (kernel_size // 2,)*2 - kwargs['padding'] = 'valid' - self.reflect_pad = True - self.func = keras.layers.Conv2DTranspose (*args, **kwargs) - - def __call__(self,x): - if self.reflect_pad: - x = ReflectionPadding2D( self.pad ) (x) - return self.func(x) - nnlib.Conv2DTranspose = Conv2DTranspose - - @staticmethod - def import_keras_contrib(device_config): - if nnlib.keras_contrib is not None: - return nnlib.code_import_keras_contrib - - import keras_contrib as keras_contrib_ - nnlib.keras_contrib = keras_contrib_ - nnlib.__initialize_keras_contrib_functions() - nnlib.code_import_keras_contrib = compile (nnlib.code_import_keras_contrib_string,'','exec') - - @staticmethod - def __initialize_keras_contrib_functions(): - pass - - @staticmethod - def import_dlib( device_config = None): - if nnlib.dlib is not None: - return nnlib.code_import_dlib - - import dlib as dlib_ - nnlib.dlib = dlib_ - if not device_config.cpu_only and "tensorflow" in device_config.backend and len(device_config.gpu_idxs) > 0: - nnlib.dlib.cuda.set_device(device_config.gpu_idxs[0]) - - nnlib.code_import_dlib = compile (nnlib.code_import_dlib_string,'','exec') - - @staticmethod - def import_all(device_config = None): - if nnlib.code_import_all is None: - if device_config is None: - device_config = nnlib.active_DeviceConfig - else: - nnlib.active_DeviceConfig = device_config - - nnlib.import_keras(device_config) - nnlib.import_keras_contrib(device_config) - nnlib.code_import_all = compile (nnlib.code_import_keras_string + '\n' - + nnlib.code_import_keras_contrib_string - + nnlib.code_import_all_string,'','exec') - nnlib.__initialize_all_functions() - - return nnlib.code_import_all - - @staticmethod - def __initialize_all_functions(): - exec (nnlib.import_keras(nnlib.active_DeviceConfig), locals(), globals()) - exec (nnlib.import_keras_contrib(nnlib.active_DeviceConfig), locals(), globals()) - - class DSSIMMSEMaskLoss(object): - def __init__(self, mask, is_mse=False): - self.mask = mask - self.is_mse = is_mse - def __call__(self,y_true, y_pred): - total_loss = None - mask = self.mask - if self.is_mse: - blur_mask = gaussian_blur(max(1, K.int_shape(mask)[1] // 64))(mask) - return K.mean ( 50*K.square( y_true*blur_mask - y_pred*blur_mask ) ) - else: - return 10*dssim() (y_true*mask, y_pred*mask) - nnlib.DSSIMMSEMaskLoss = DSSIMMSEMaskLoss - - - ''' - def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - - def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) - - def func(input): - - - def ResnetBlock(dim): - def func(input): - x = input - - x = ReflectionPadding2D((1,1))(x) - x = Conv2D(dim, 3, 1, padding='valid')(x) - x = XNormalization(x) - x = ReLU()(x) - - if use_dropout: - x = Dropout(0.5)(x) - - x = ReflectionPadding2D((1,1))(x) - x = Conv2D(dim, 3, 1, padding='valid')(x) - x = XNormalization(x) - x = ReLU()(x) - return Add()([x,input]) - return func - - x = input - - x = ReflectionPadding2D((3,3))(x) - x = Conv2D(ngf, 7, 1, 'valid')(x) - - x = ReLU()(XNormalization(Conv2D(ngf*2, 4, 2, 'same')(x))) - x = ReLU()(XNormalization(Conv2D(ngf*4, 4, 2, 'same')(x))) - - for i in range(n_blocks): - x = ResnetBlock(ngf*4)(x) - - x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf*2 *4, 3, 1, 'same')(x)))) - x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf *4, 3, 1, 'same')(x)))) - - x = ReflectionPadding2D((3,3))(x) - x = Conv2D(output_nc, 7, 1, 'valid')(x) - x = tanh(x) - - return x - - return func - - nnlib.ResNet = ResNet - - # Defines the Unet generator. - # |num_downs|: number of downsamplings in UNet. For example, - # if |num_downs| == 7, image of size 128x128 will become of size 1x1 - # at the bottleneck - def UNet(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - - def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) - - def UNetSkipConnection(outer_nc, inner_nc, sub_model=None, outermost=False, innermost=False, use_dropout=False): - def func(inp): - x = inp - - x = Conv2D(inner_nc, 4, 2, 'valid')(ReflectionPadding2D( (1,1) )(x)) - x = XNormalization(x) - x = ReLU()(x) - - if not innermost: - x = sub_model(x) - - if not outermost: - x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) - x = XNormalization(x) - x = ReLU()(x) - - if not innermost: - if use_dropout: - x = Dropout(0.5)(x) - - x = Concatenate(axis=3)([inp, x]) - else: - x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) - x = tanh(x) - - - return x - - return func - - def func(input): - - unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=None, innermost=True) - - for i in range(num_downs - 5): - unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=unet_block, use_dropout=use_dropout) - - unet_block = UNetSkipConnection(ngf * 4 , ngf * 8, sub_model=unet_block) - unet_block = UNetSkipConnection(ngf * 2 , ngf * 4, sub_model=unet_block) - unet_block = UNetSkipConnection(ngf , ngf * 2, sub_model=unet_block) - unet_block = UNetSkipConnection(output_nc, ngf , sub_model=unet_block, outermost=True) - - return unet_block(input) - return func - nnlib.UNet = UNet - - #predicts based on two past_image_tensors - def UNetTemporalPredictor(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - def func(inputs): - past_2_image_tensor, past_1_image_tensor = inputs - - x = Concatenate(axis=3)([ past_2_image_tensor, past_1_image_tensor ]) - x = UNet(3, use_batch_norm, num_downs=num_downs, ngf=ngf, use_dropout=use_dropout) (x) - - return x - - return func - nnlib.UNetTemporalPredictor = UNetTemporalPredictor - - def NLayerDiscriminator(use_batch_norm, ndf=64, n_layers=3): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - - def func(input): - x = input - - x = ZeroPadding2D((1,1))(x) - x = Conv2D( ndf, 4, 2, 'valid')(x) - x = LeakyReLU(0.2)(x) - - for i in range(1, n_layers): - x = ZeroPadding2D((1,1))(x) - x = Conv2D( ndf * min(2 ** i, 8), 4, 2, 'valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = Conv2D( ndf * min(2 ** n_layers, 8), 4, 1, 'valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - return Conv2D( 1, 4, 1, 'valid')(x) - return func - nnlib.NLayerDiscriminator = NLayerDiscriminator - ''' - @staticmethod - def finalize_all(): - if nnlib.keras_contrib is not None: - nnlib.keras_contrib = None - - if nnlib.keras is not None: - nnlib.keras.backend.clear_session() - nnlib.keras = None - - if nnlib.tf is not None: - nnlib.tf_sess = None - nnlib.tf = None - - -class CAInitializerMPSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - self.floatx = client_dict['floatx'] - self.data_format = client_dict['data_format'] - - #override - def process_data(self, data): - idx, shape = data - weights = CAGenerateWeights (shape, self.floatx, self.data_format) - return idx, weights - - #override - def get_data_name (self, data): - #return string identificator of your data - return "undefined" - - #override - def __init__(self, idx_shapes_list, floatx, data_format ): - - self.idx_shapes_list = idx_shapes_list - self.floatx = floatx - self.data_format = data_format - - self.result = [] - super().__init__('CAInitializerMP', CAInitializerMPSubprocessor.Cli) - - #override - def on_clients_initialized(self): - io.progress_bar ("Initializing CA weights", len (self.idx_shapes_list)) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def process_info_generator(self): - for i in range(multiprocessing.cpu_count()): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'floatx' : self.floatx, - 'data_format' : self.data_format - } - - #override - def get_data(self, host_dict): - if len (self.idx_shapes_list) > 0: - return self.idx_shapes_list.pop(0) - - return None - - #override - def on_data_return (self, host_dict, data): - self.idx_shapes_list.insert(0, data) - - #override - def on_result (self, host_dict, data, result): - self.result.append ( result ) - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.result +import os +import sys +import contextlib +import numpy as np + +from .CAInitializer import CAGenerateWeights +import multiprocessing +from joblib import Subprocessor + +from utils import std_utils +from .device import device +from interact import interact as io + +class nnlib(object): + device = device #forwards nnlib.devicelib to device in order to use nnlib as standalone lib + DeviceConfig = device.Config + active_DeviceConfig = DeviceConfig() #default is one best GPU + + backend = "" + + dlib = None + + keras = None + keras_contrib = None + + tf = None + tf_sess = None + + PML = None + PMLK = None + PMLTile= None + + code_import_keras = None + code_import_keras_contrib = None + code_import_all = None + + code_import_dlib = None + + + ResNet = None + UNet = None + UNetTemporalPredictor = None + NLayerDiscriminator = None + + code_import_keras_string = \ +""" +keras = nnlib.keras +K = keras.backend +KL = keras.layers + +Input = KL.Input + +Dense = KL.Dense +Conv2D = nnlib.Conv2D +Conv2DTranspose = nnlib.Conv2DTranspose +SeparableConv2D = KL.SeparableConv2D +MaxPooling2D = KL.MaxPooling2D +UpSampling2D = KL.UpSampling2D +BatchNormalization = KL.BatchNormalization + +LeakyReLU = KL.LeakyReLU +ReLU = KL.ReLU +PReLU = KL.PReLU +tanh = KL.Activation('tanh') +sigmoid = KL.Activation('sigmoid') +Dropout = KL.Dropout +Softmax = KL.Softmax + +Lambda = KL.Lambda +Add = KL.Add +Concatenate = KL.Concatenate + + +Flatten = KL.Flatten +Reshape = KL.Reshape + +ZeroPadding2D = KL.ZeroPadding2D + +RandomNormal = keras.initializers.RandomNormal +Model = keras.models.Model + +Adam = nnlib.Adam + +modelify = nnlib.modelify +gaussian_blur = nnlib.gaussian_blur +style_loss = nnlib.style_loss +dssim = nnlib.dssim + +PixelShuffler = nnlib.PixelShuffler +SubpixelUpscaler = nnlib.SubpixelUpscaler +Scale = nnlib.Scale + +CAInitializerMP = nnlib.CAInitializerMP + +#ReflectionPadding2D = nnlib.ReflectionPadding2D +#AddUniformNoise = nnlib.AddUniformNoise +""" + code_import_keras_contrib_string = \ +""" +keras_contrib = nnlib.keras_contrib +GroupNormalization = keras_contrib.layers.GroupNormalization +InstanceNormalization = keras_contrib.layers.InstanceNormalization +""" + code_import_dlib_string = \ +""" +dlib = nnlib.dlib +""" + + code_import_all_string = \ +""" +DSSIMMSEMaskLoss = nnlib.DSSIMMSEMaskLoss +ResNet = nnlib.ResNet +UNet = nnlib.UNet +UNetTemporalPredictor = nnlib.UNetTemporalPredictor +NLayerDiscriminator = nnlib.NLayerDiscriminator +""" + + + @staticmethod + def _import_tf(device_config): + if nnlib.tf is not None: + return nnlib.code_import_tf + + if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': + suppressor = std_utils.suppress_stdout_stderr().__enter__() + else: + suppressor = None + + if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): + os.environ.pop('CUDA_VISIBLE_DEVICES') + + os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2' + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #tf log errors only + import tensorflow as tf + nnlib.tf = tf + + if device_config.cpu_only: + config = tf.ConfigProto(device_count={'GPU': 0}) + else: + config = tf.ConfigProto() + + if device_config.backend != "tensorflow-generic": + #tensorflow-generic is system with NVIDIA card, but w/o NVSMI + #so dont hide devices and let tensorflow to choose best card + visible_device_list = '' + for idx in device_config.gpu_idxs: + visible_device_list += str(idx) + ',' + config.gpu_options.visible_device_list=visible_device_list[:-1] + + config.gpu_options.force_gpu_compatible = True + config.gpu_options.allow_growth = device_config.allow_growth + + nnlib.tf_sess = tf.Session(config=config) + + if suppressor is not None: + suppressor.__exit__() + + @staticmethod + def import_keras(device_config): + if nnlib.keras is not None: + return nnlib.code_import_keras + + nnlib.backend = device_config.backend + if "tensorflow" in nnlib.backend: + nnlib._import_tf(device_config) + elif nnlib.backend == "plaidML": + os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" + os.environ["PLAIDML_DEVICE_IDS"] = ",".join ( [ nnlib.device.getDeviceID(idx) for idx in device_config.gpu_idxs] ) + + #if "tensorflow" in nnlib.backend: + # nnlib.keras = nnlib.tf.keras + #else: + import keras as keras_ + nnlib.keras = keras_ + + if 'KERAS_BACKEND' in os.environ: + os.environ.pop('KERAS_BACKEND') + + if nnlib.backend == "plaidML": + import plaidml + import plaidml.tile + nnlib.PML = plaidml + nnlib.PMLK = plaidml.keras.backend + nnlib.PMLTile = plaidml.tile + + if device_config.use_fp16: + nnlib.keras.backend.set_floatx('float16') + + if "tensorflow" in nnlib.backend: + nnlib.keras.backend.set_session(nnlib.tf_sess) + + nnlib.keras.backend.set_image_data_format('channels_last') + + nnlib.code_import_keras = compile (nnlib.code_import_keras_string,'','exec') + nnlib.__initialize_keras_functions() + + return nnlib.code_import_keras + + @staticmethod + def __initialize_keras_functions(): + keras = nnlib.keras + K = keras.backend + KL = keras.layers + backend = nnlib.backend + + def modelify(model_functor): + def func(tensor): + return keras.models.Model (tensor, model_functor(tensor)) + return func + + nnlib.modelify = modelify + + def gaussian_blur(radius=2.0): + def gaussian(x, mu, sigma): + return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2)) + + def make_kernel(sigma): + kernel_size = max(3, int(2 * 2 * sigma + 1)) + mean = np.floor(0.5 * kernel_size) + kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)]) + np_kernel = np.outer(kernel_1d, kernel_1d).astype(dtype=K.floatx()) + kernel = np_kernel / np.sum(np_kernel) + return kernel + + gauss_kernel = make_kernel(radius) + gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis] + + def func(input): + inputs = [ input[:,:,:,i:i+1] for i in range( K.int_shape( input )[-1] ) ] + + outputs = [] + for i in range(len(inputs)): + outputs += [ K.conv2d( inputs[i] , K.constant(gauss_kernel) , strides=(1,1), padding="same") ] + + return K.concatenate (outputs, axis=-1) + return func + nnlib.gaussian_blur = gaussian_blur + + def style_loss(gaussian_blur_radius=0.0, loss_weight=1.0, wnd_size=0, step_size=1): + if gaussian_blur_radius > 0.0: + gblur = gaussian_blur(gaussian_blur_radius) + + def sd(content, style, loss_weight): + content_nc = K.int_shape(content)[-1] + style_nc = K.int_shape(style)[-1] + if content_nc != style_nc: + raise Exception("style_loss() content_nc != style_nc") + + axes = [1,2] + c_mean, c_var = K.mean(content, axis=axes, keepdims=True), K.var(content, axis=axes, keepdims=True) + s_mean, s_var = K.mean(style, axis=axes, keepdims=True), K.var(style, axis=axes, keepdims=True) + c_std, s_std = K.sqrt(c_var + 1e-5), K.sqrt(s_var + 1e-5) + + mean_loss = K.sum(K.square(c_mean-s_mean)) + std_loss = K.sum(K.square(c_std-s_std)) + + return (mean_loss + std_loss) * ( loss_weight / float(content_nc) ) + + def func(target, style): + if wnd_size == 0: + if gaussian_blur_radius > 0.0: + return sd( gblur(target), gblur(style), loss_weight=loss_weight) + else: + return sd( target, style, loss_weight=loss_weight ) + else: + #currently unused + if nnlib.tf is not None: + sh = K.int_shape(target)[1] + k = (sh-wnd_size) // step_size + 1 + if gaussian_blur_radius > 0.0: + target, style = gblur(target), gblur(style) + target = nnlib.tf.image.extract_image_patches(target, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') + style = nnlib.tf.image.extract_image_patches(style, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') + return sd( target, style, loss_weight ) + if nnlib.PML is not None: + print ("Sorry, plaidML backend does not support style_loss") + return 0 + return func + nnlib.style_loss = style_loss + + def dssim(kernel_size=11, k1=0.01, k2=0.03, max_value=1.0): + # port of tf.image.ssim to pure keras in order to work on plaidML backend. + + def func(y_true, y_pred): + ch = K.shape(y_pred)[-1] + + def _fspecial_gauss(size, sigma): + #Function to mimic the 'fspecial' gaussian MATLAB function. + coords = np.arange(0, size, dtype=K.floatx()) + coords -= (size - 1 ) / 2.0 + g = coords**2 + g *= ( -0.5 / (sigma**2) ) + g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) ) + g = K.constant ( np.reshape (g, (1,-1)) ) + g = K.softmax(g) + g = K.reshape (g, (size, size, 1, 1)) + g = K.tile (g, (1,1,ch,1)) + return g + + kernel = _fspecial_gauss(kernel_size,1.5) + + def reducer(x): + return K.depthwise_conv2d(x, kernel, strides=(1, 1), padding='valid') + + c1 = (k1 * max_value) ** 2 + c2 = (k2 * max_value) ** 2 + + mean0 = reducer(y_true) + mean1 = reducer(y_pred) + num0 = mean0 * mean1 * 2.0 + den0 = K.square(mean0) + K.square(mean1) + luminance = (num0 + c1) / (den0 + c1) + + num1 = reducer(y_true * y_pred) * 2.0 + den1 = reducer(K.square(y_true) + K.square(y_pred)) + c2 *= 1.0 #compensation factor + cs = (num1 - num0 + c2) / (den1 - den0 + c2) + + ssim_val = K.mean(luminance * cs, axis=(-3, -2) ) + return(1.0 - ssim_val ) / 2.0 + + return func + + nnlib.dssim = dssim + + if 'tensorflow' in backend: + class PixelShuffler(keras.layers.Layer): + def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): + super(PixelShuffler, self).__init__(**kwargs) + self.data_format = data_format + self.size = size + + def call(self, inputs): + input_shape = K.shape(inputs) + if K.int_shape(input_shape)[0] != 4: + raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) + + if self.data_format == 'channels_first': + return K.tf.depth_to_space(inputs, self.size[0], 'NCHW') + + elif self.data_format == 'channels_last': + return K.tf.depth_to_space(inputs, self.size[0], 'NHWC') + + def compute_output_shape(self, input_shape): + if len(input_shape) != 4: + raise ValueError('Inputs should have rank ' + + str(4) + + '; Received input shape:', str(input_shape)) + + if self.data_format == 'channels_first': + height = input_shape[2] * self.size[0] if input_shape[2] is not None else None + width = input_shape[3] * self.size[1] if input_shape[3] is not None else None + channels = input_shape[1] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[1]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + channels, + height, + width) + + elif self.data_format == 'channels_last': + height = input_shape[1] * self.size[0] if input_shape[1] is not None else None + width = input_shape[2] * self.size[1] if input_shape[2] is not None else None + channels = input_shape[3] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[3]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + height, + width, + channels) + + def get_config(self): + config = {'size': self.size, + 'data_format': self.data_format} + base_config = super(PixelShuffler, self).get_config() + + return dict(list(base_config.items()) + list(config.items())) + else: + class PixelShuffler(KL.Layer): + def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): + super(PixelShuffler, self).__init__(**kwargs) + self.data_format = data_format + self.size = size + + def call(self, inputs): + + input_shape = K.shape(inputs) + if K.int_shape(input_shape)[0] != 4: + raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) + + if self.data_format == 'channels_first': + batch_size, c, h, w = input_shape[0], K.int_shape(inputs)[1], input_shape[2], input_shape[3] + rh, rw = self.size + oh, ow = h * rh, w * rw + oc = c // (rh * rw) + + out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w)) + out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) + out = K.reshape(out, (batch_size, oc, oh, ow)) + return out + + elif self.data_format == 'channels_last': + batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1] + rh, rw = self.size + oh, ow = h * rh, w * rw + oc = c // (rh * rw) + + out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc)) + out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) + out = K.reshape(out, (batch_size, oh, ow, oc)) + return out + + def compute_output_shape(self, input_shape): + if len(input_shape) != 4: + raise ValueError('Inputs should have rank ' + + str(4) + + '; Received input shape:', str(input_shape)) + + if self.data_format == 'channels_first': + height = input_shape[2] * self.size[0] if input_shape[2] is not None else None + width = input_shape[3] * self.size[1] if input_shape[3] is not None else None + channels = input_shape[1] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[1]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + channels, + height, + width) + + elif self.data_format == 'channels_last': + height = input_shape[1] * self.size[0] if input_shape[1] is not None else None + width = input_shape[2] * self.size[1] if input_shape[2] is not None else None + channels = input_shape[3] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[3]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + height, + width, + channels) + + def get_config(self): + config = {'size': self.size, + 'data_format': self.data_format} + base_config = super(PixelShuffler, self).get_config() + + return dict(list(base_config.items()) + list(config.items())) + + nnlib.PixelShuffler = PixelShuffler + nnlib.SubpixelUpscaler = PixelShuffler + + class Scale(KL.Layer): + """ + GAN Custom Scal Layer + Code borrows from https://github.com/flyyufelix/cnn_finetune + """ + def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs): + self.axis = axis + self.gamma_init = keras.initializers.get(gamma_init) + self.initial_weights = weights + super(Scale, self).__init__(**kwargs) + + def build(self, input_shape): + self.input_spec = [keras.engine.InputSpec(shape=input_shape)] + + # Compatibility with TensorFlow >= 1.0.0 + self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name)) + self.trainable_weights = [self.gamma] + + if self.initial_weights is not None: + self.set_weights(self.initial_weights) + del self.initial_weights + + def call(self, x, mask=None): + return self.gamma * x + + def get_config(self): + config = {"axis": self.axis} + base_config = super(Scale, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + nnlib.Scale = Scale + + class Adam(keras.optimizers.Optimizer): + """Adam optimizer. + + Default parameters follow those provided in the original paper. + + # Arguments + lr: float >= 0. Learning rate. + beta_1: float, 0 < beta < 1. Generally close to 1. + beta_2: float, 0 < beta < 1. Generally close to 1. + epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. + decay: float >= 0. Learning rate decay over each update. + amsgrad: boolean. Whether to apply the AMSGrad variant of this + algorithm from the paper "On the Convergence of Adam and + Beyond". + tf_cpu_mode: only for tensorflow backend + 0 - default, no changes. + 1 - allows to train x2 bigger network on same VRAM consuming RAM + 2 - allows to train x3 bigger network on same VRAM consuming RAM*2 and CPU power. + + # References + - [Adam - A Method for Stochastic Optimization] + (https://arxiv.org/abs/1412.6980v8) + - [On the Convergence of Adam and Beyond] + (https://openreview.net/forum?id=ryQu7f-RZ) + """ + + def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, + epsilon=None, decay=0., amsgrad=False, tf_cpu_mode=0, **kwargs): + super(Adam, self).__init__(**kwargs) + with K.name_scope(self.__class__.__name__): + self.iterations = K.variable(0, dtype='int64', name='iterations') + self.lr = K.variable(lr, name='lr') + self.beta_1 = K.variable(beta_1, name='beta_1') + self.beta_2 = K.variable(beta_2, name='beta_2') + self.decay = K.variable(decay, name='decay') + if epsilon is None: + epsilon = K.epsilon() + self.epsilon = epsilon + self.initial_decay = decay + self.amsgrad = amsgrad + self.tf_cpu_mode = tf_cpu_mode + + def get_updates(self, loss, params): + grads = self.get_gradients(loss, params) + self.updates = [K.update_add(self.iterations, 1)] + + lr = self.lr + if self.initial_decay > 0: + lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, + K.dtype(self.decay)))) + + t = K.cast(self.iterations, K.floatx()) + 1 + lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / + (1. - K.pow(self.beta_1, t))) + + e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None + if e: e.__enter__() + ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] + vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] + if self.amsgrad: + vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] + else: + vhats = [K.zeros(1) for _ in params] + if e: e.__exit__(None, None, None) + + self.weights = [self.iterations] + ms + vs + vhats + + for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): + e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None + if e: e.__enter__() + m_t = (self.beta_1 * m) + (1. - self.beta_1) * g + v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) + + if self.amsgrad: + vhat_t = K.maximum(vhat, v_t) + self.updates.append(K.update(vhat, vhat_t)) + if e: e.__exit__(None, None, None) + + if self.amsgrad: + p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) + else: + p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) + + self.updates.append(K.update(m, m_t)) + self.updates.append(K.update(v, v_t)) + new_p = p_t + + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) + + self.updates.append(K.update(p, new_p)) + return self.updates + + def get_config(self): + config = {'lr': float(K.get_value(self.lr)), + 'beta_1': float(K.get_value(self.beta_1)), + 'beta_2': float(K.get_value(self.beta_2)), + 'decay': float(K.get_value(self.decay)), + 'epsilon': self.epsilon, + 'amsgrad': self.amsgrad} + base_config = super(Adam, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + nnlib.Adam = Adam + + def CAInitializerMP( conv_weights_list ): + #Convolution Aware Initialization https://arxiv.org/abs/1702.06295 + result = CAInitializerMPSubprocessor ( [ (i, K.int_shape(conv_weights)) for i, conv_weights in enumerate(conv_weights_list) ], K.floatx(), K.image_data_format() ).run() + for idx, weights in result: + K.set_value ( conv_weights_list[idx], weights ) + nnlib.CAInitializerMP = CAInitializerMP + + + if backend == "plaidML": + class TileOP_ReflectionPadding2D(nnlib.PMLTile.Operation): + def __init__(self, input, w_pad, h_pad): + if K.image_data_format() == 'channels_last': + if input.shape.ndims == 4: + H, W = input.shape.dims[1:3] + if (type(H) == int and h_pad >= H) or \ + (type(W) == int and w_pad >= W): + raise ValueError("Paddings must be less than dimensions.") + + c = """ function (I[B, H, W, C] ) -> (O) {{ + WE = W + {w_pad}*2; + HE = H + {h_pad}*2; + """.format(h_pad=h_pad, w_pad=w_pad) + if w_pad > 0: + c += """ + LEFT_PAD [b, h, w , c : B, H, WE, C ] = =(I[b, h, {w_pad}-w, c]), w < {w_pad} ; + HCENTER [b, h, w , c : B, H, WE, C ] = =(I[b, h, w-{w_pad}, c]), w < W+{w_pad}-1 ; + RIGHT_PAD[b, h, w , c : B, H, WE, C ] = =(I[b, h, 2*W - (w-{w_pad}) -2, c]); + LCR = LEFT_PAD+HCENTER+RIGHT_PAD; + """.format(h_pad=h_pad, w_pad=w_pad) + else: + c += "LCR = I;" + + if h_pad > 0: + c += """ + TOP_PAD [b, h, w , c : B, HE, WE, C ] = =(LCR[b, {h_pad}-h, w, c]), h < {h_pad}; + VCENTER [b, h, w , c : B, HE, WE, C ] = =(LCR[b, h-{h_pad}, w, c]), h < H+{h_pad}-1 ; + BOTTOM_PAD[b, h, w , c : B, HE, WE, C ] = =(LCR[b, 2*H - (h-{h_pad}) -2, w, c]); + TVB = TOP_PAD+VCENTER+BOTTOM_PAD; + """.format(h_pad=h_pad, w_pad=w_pad) + else: + c += "TVB = LCR;" + + c += "O = TVB; }" + + inp_dims = input.shape.dims + out_dims = (inp_dims[0], inp_dims[1]+h_pad*2, inp_dims[2]+w_pad*2, inp_dims[3]) + else: + raise NotImplemented + else: + raise NotImplemented + + super(TileOP_ReflectionPadding2D, self).__init__(c, [('I', input) ], + [('O', nnlib.PMLTile.Shape(input.shape.dtype, out_dims ) )]) + + class ReflectionPadding2D(keras.layers.Layer): + def __init__(self, padding=(1, 1), **kwargs): + self.padding = tuple(padding) + self.input_spec = [keras.layers.InputSpec(ndim=4)] + super(ReflectionPadding2D, self).__init__(**kwargs) + + def compute_output_shape(self, s): + """ If you are using "channels_last" configuration""" + return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3]) + + def call(self, x, mask=None): + w_pad,h_pad = self.padding + if "tensorflow" in backend: + return K.tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT') + elif backend == "plaidML": + return TileOP_ReflectionPadding2D.function(x, self.padding[0], self.padding[1]) + else: + if K.image_data_format() == 'channels_last': + if x.shape.ndims == 4: + w = K.concatenate ([ x[:,:,w_pad:0:-1,:], + x, + x[:,:,-2:-w_pad-2:-1,:] ], axis=2 ) + h = K.concatenate ([ w[:,h_pad:0:-1,:,:], + w, + w[:,-2:-h_pad-2:-1,:,:] ], axis=1 ) + return h + else: + raise NotImplemented + else: + raise NotImplemented + + nnlib.ReflectionPadding2D = ReflectionPadding2D + + class Conv2D(): + def __init__ (self, *args, **kwargs): + self.reflect_pad = False + padding = kwargs.get('padding','') + if padding == 'zero': + kwargs['padding'] = 'same' + if padding == 'reflect': + kernel_size = kwargs['kernel_size'] + if (kernel_size % 2) == 1: + self.pad = (kernel_size // 2,)*2 + kwargs['padding'] = 'valid' + self.reflect_pad = True + self.func = keras.layers.Conv2D (*args, **kwargs) + + def __call__(self,x): + if self.reflect_pad: + x = ReflectionPadding2D( self.pad ) (x) + return self.func(x) + nnlib.Conv2D = Conv2D + + class Conv2DTranspose(): + def __init__ (self, *args, **kwargs): + self.reflect_pad = False + padding = kwargs.get('padding','') + if padding == 'zero': + kwargs['padding'] = 'same' + if padding == 'reflect': + kernel_size = kwargs['kernel_size'] + if (kernel_size % 2) == 1: + self.pad = (kernel_size // 2,)*2 + kwargs['padding'] = 'valid' + self.reflect_pad = True + self.func = keras.layers.Conv2DTranspose (*args, **kwargs) + + def __call__(self,x): + if self.reflect_pad: + x = ReflectionPadding2D( self.pad ) (x) + return self.func(x) + nnlib.Conv2DTranspose = Conv2DTranspose + + @staticmethod + def import_keras_contrib(device_config): + if nnlib.keras_contrib is not None: + return nnlib.code_import_keras_contrib + + import keras_contrib as keras_contrib_ + nnlib.keras_contrib = keras_contrib_ + nnlib.__initialize_keras_contrib_functions() + nnlib.code_import_keras_contrib = compile (nnlib.code_import_keras_contrib_string,'','exec') + + @staticmethod + def __initialize_keras_contrib_functions(): + pass + + @staticmethod + def import_dlib( device_config = None): + if nnlib.dlib is not None: + return nnlib.code_import_dlib + + import dlib as dlib_ + nnlib.dlib = dlib_ + if not device_config.cpu_only and "tensorflow" in device_config.backend and len(device_config.gpu_idxs) > 0: + nnlib.dlib.cuda.set_device(device_config.gpu_idxs[0]) + + nnlib.code_import_dlib = compile (nnlib.code_import_dlib_string,'','exec') + + @staticmethod + def import_all(device_config = None): + if nnlib.code_import_all is None: + if device_config is None: + device_config = nnlib.active_DeviceConfig + else: + nnlib.active_DeviceConfig = device_config + + nnlib.import_keras(device_config) + nnlib.import_keras_contrib(device_config) + nnlib.code_import_all = compile (nnlib.code_import_keras_string + '\n' + + nnlib.code_import_keras_contrib_string + + nnlib.code_import_all_string,'','exec') + nnlib.__initialize_all_functions() + + return nnlib.code_import_all + + @staticmethod + def __initialize_all_functions(): + exec (nnlib.import_keras(nnlib.active_DeviceConfig), locals(), globals()) + exec (nnlib.import_keras_contrib(nnlib.active_DeviceConfig), locals(), globals()) + + class DSSIMMSEMaskLoss(object): + def __init__(self, mask, is_mse=False): + self.mask = mask + self.is_mse = is_mse + def __call__(self,y_true, y_pred): + total_loss = None + mask = self.mask + if self.is_mse: + blur_mask = gaussian_blur(max(1, K.int_shape(mask)[1] // 64))(mask) + return K.mean ( 50*K.square( y_true*blur_mask - y_pred*blur_mask ) ) + else: + return 10*dssim() (y_true*mask, y_pred*mask) + nnlib.DSSIMMSEMaskLoss = DSSIMMSEMaskLoss + + + ''' + def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) + else: + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) + + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + + def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) + + def func(input): + + + def ResnetBlock(dim): + def func(input): + x = input + + x = ReflectionPadding2D((1,1))(x) + x = Conv2D(dim, 3, 1, padding='valid')(x) + x = XNormalization(x) + x = ReLU()(x) + + if use_dropout: + x = Dropout(0.5)(x) + + x = ReflectionPadding2D((1,1))(x) + x = Conv2D(dim, 3, 1, padding='valid')(x) + x = XNormalization(x) + x = ReLU()(x) + return Add()([x,input]) + return func + + x = input + + x = ReflectionPadding2D((3,3))(x) + x = Conv2D(ngf, 7, 1, 'valid')(x) + + x = ReLU()(XNormalization(Conv2D(ngf*2, 4, 2, 'same')(x))) + x = ReLU()(XNormalization(Conv2D(ngf*4, 4, 2, 'same')(x))) + + for i in range(n_blocks): + x = ResnetBlock(ngf*4)(x) + + x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf*2 *4, 3, 1, 'same')(x)))) + x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf *4, 3, 1, 'same')(x)))) + + x = ReflectionPadding2D((3,3))(x) + x = Conv2D(output_nc, 7, 1, 'valid')(x) + x = tanh(x) + + return x + + return func + + nnlib.ResNet = ResNet + + # Defines the Unet generator. + # |num_downs|: number of downsamplings in UNet. For example, + # if |num_downs| == 7, image of size 128x128 will become of size 1x1 + # at the bottleneck + def UNet(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) + else: + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) + + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + + def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) + + def UNetSkipConnection(outer_nc, inner_nc, sub_model=None, outermost=False, innermost=False, use_dropout=False): + def func(inp): + x = inp + + x = Conv2D(inner_nc, 4, 2, 'valid')(ReflectionPadding2D( (1,1) )(x)) + x = XNormalization(x) + x = ReLU()(x) + + if not innermost: + x = sub_model(x) + + if not outermost: + x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) + x = XNormalization(x) + x = ReLU()(x) + + if not innermost: + if use_dropout: + x = Dropout(0.5)(x) + + x = Concatenate(axis=3)([inp, x]) + else: + x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) + x = tanh(x) + + + return x + + return func + + def func(input): + + unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=None, innermost=True) + + for i in range(num_downs - 5): + unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=unet_block, use_dropout=use_dropout) + + unet_block = UNetSkipConnection(ngf * 4 , ngf * 8, sub_model=unet_block) + unet_block = UNetSkipConnection(ngf * 2 , ngf * 4, sub_model=unet_block) + unet_block = UNetSkipConnection(ngf , ngf * 2, sub_model=unet_block) + unet_block = UNetSkipConnection(output_nc, ngf , sub_model=unet_block, outermost=True) + + return unet_block(input) + return func + nnlib.UNet = UNet + + #predicts based on two past_image_tensors + def UNetTemporalPredictor(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): + exec (nnlib.import_all(), locals(), globals()) + def func(inputs): + past_2_image_tensor, past_1_image_tensor = inputs + + x = Concatenate(axis=3)([ past_2_image_tensor, past_1_image_tensor ]) + x = UNet(3, use_batch_norm, num_downs=num_downs, ngf=ngf, use_dropout=use_dropout) (x) + + return x + + return func + nnlib.UNetTemporalPredictor = UNetTemporalPredictor + + def NLayerDiscriminator(use_batch_norm, ndf=64, n_layers=3): + exec (nnlib.import_all(), locals(), globals()) + + if not use_batch_norm: + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) + else: + use_bias = False + def XNormalization(x): + return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) + + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + + def func(input): + x = input + + x = ZeroPadding2D((1,1))(x) + x = Conv2D( ndf, 4, 2, 'valid')(x) + x = LeakyReLU(0.2)(x) + + for i in range(1, n_layers): + x = ZeroPadding2D((1,1))(x) + x = Conv2D( ndf * min(2 ** i, 8), 4, 2, 'valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + x = Conv2D( ndf * min(2 ** n_layers, 8), 4, 1, 'valid')(x) + x = XNormalization(x) + x = LeakyReLU(0.2)(x) + + x = ZeroPadding2D((1,1))(x) + return Conv2D( 1, 4, 1, 'valid')(x) + return func + nnlib.NLayerDiscriminator = NLayerDiscriminator + ''' + @staticmethod + def finalize_all(): + if nnlib.keras_contrib is not None: + nnlib.keras_contrib = None + + if nnlib.keras is not None: + nnlib.keras.backend.clear_session() + nnlib.keras = None + + if nnlib.tf is not None: + nnlib.tf_sess = None + nnlib.tf = None + + +class CAInitializerMPSubprocessor(Subprocessor): + class Cli(Subprocessor.Cli): + + #override + def on_initialize(self, client_dict): + self.floatx = client_dict['floatx'] + self.data_format = client_dict['data_format'] + + #override + def process_data(self, data): + idx, shape = data + weights = CAGenerateWeights (shape, self.floatx, self.data_format) + return idx, weights + + #override + def get_data_name (self, data): + #return string identificator of your data + return "undefined" + + #override + def __init__(self, idx_shapes_list, floatx, data_format ): + + self.idx_shapes_list = idx_shapes_list + self.floatx = floatx + self.data_format = data_format + + self.result = [] + super().__init__('CAInitializerMP', CAInitializerMPSubprocessor.Cli) + + #override + def on_clients_initialized(self): + io.progress_bar ("Initializing CA weights", len (self.idx_shapes_list)) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def process_info_generator(self): + for i in range(multiprocessing.cpu_count()): + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + 'floatx' : self.floatx, + 'data_format' : self.data_format + } + + #override + def get_data(self, host_dict): + if len (self.idx_shapes_list) > 0: + return self.idx_shapes_list.pop(0) + + return None + + #override + def on_data_return (self, host_dict, data): + self.idx_shapes_list.insert(0, data) + + #override + def on_result (self, host_dict, data, result): + self.result.append ( result ) + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.result diff --git a/nnlib/pynvml.py b/nnlib/pynvml.py index 5923f23..5cc5a50 100644 --- a/nnlib/pynvml.py +++ b/nnlib/pynvml.py @@ -1,1727 +1,1727 @@ -##### -# Copyright (c) 2011-2015, NVIDIA Corporation. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the NVIDIA Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -##### - -## -# Python bindings for the NVML library -## -from ctypes import * -from ctypes.util import find_library -import sys -import os -import threading -import string - -## C Type mappings ## -## Enums -_nvmlEnableState_t = c_uint -NVML_FEATURE_DISABLED = 0 -NVML_FEATURE_ENABLED = 1 - -_nvmlBrandType_t = c_uint -NVML_BRAND_UNKNOWN = 0 -NVML_BRAND_QUADRO = 1 -NVML_BRAND_TESLA = 2 -NVML_BRAND_NVS = 3 -NVML_BRAND_GRID = 4 -NVML_BRAND_GEFORCE = 5 -NVML_BRAND_COUNT = 6 - -_nvmlTemperatureThresholds_t = c_uint -NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0 -NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1 -NVML_TEMPERATURE_THRESHOLD_COUNT = 1 - -_nvmlTemperatureSensors_t = c_uint -NVML_TEMPERATURE_GPU = 0 -NVML_TEMPERATURE_COUNT = 1 - -_nvmlComputeMode_t = c_uint -NVML_COMPUTEMODE_DEFAULT = 0 -NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1 -NVML_COMPUTEMODE_PROHIBITED = 2 -NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 -NVML_COMPUTEMODE_COUNT = 4 - -_nvmlMemoryLocation_t = c_uint -NVML_MEMORY_LOCATION_L1_CACHE = 0 -NVML_MEMORY_LOCATION_L2_CACHE = 1 -NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2 -NVML_MEMORY_LOCATION_REGISTER_FILE = 3 -NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4 -NVML_MEMORY_LOCATION_COUNT = 5 - -# These are deprecated, instead use _nvmlMemoryErrorType_t -_nvmlEccBitType_t = c_uint -NVML_SINGLE_BIT_ECC = 0 -NVML_DOUBLE_BIT_ECC = 1 -NVML_ECC_ERROR_TYPE_COUNT = 2 - -_nvmlEccCounterType_t = c_uint -NVML_VOLATILE_ECC = 0 -NVML_AGGREGATE_ECC = 1 -NVML_ECC_COUNTER_TYPE_COUNT = 2 - -_nvmlMemoryErrorType_t = c_uint -NVML_MEMORY_ERROR_TYPE_CORRECTED = 0 -NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1 -NVML_MEMORY_ERROR_TYPE_COUNT = 2 - -_nvmlClockType_t = c_uint -NVML_CLOCK_GRAPHICS = 0 -NVML_CLOCK_SM = 1 -NVML_CLOCK_MEM = 2 -NVML_CLOCK_COUNT = 3 - -_nvmlDriverModel_t = c_uint -NVML_DRIVER_WDDM = 0 -NVML_DRIVER_WDM = 1 - -_nvmlPstates_t = c_uint -NVML_PSTATE_0 = 0 -NVML_PSTATE_1 = 1 -NVML_PSTATE_2 = 2 -NVML_PSTATE_3 = 3 -NVML_PSTATE_4 = 4 -NVML_PSTATE_5 = 5 -NVML_PSTATE_6 = 6 -NVML_PSTATE_7 = 7 -NVML_PSTATE_8 = 8 -NVML_PSTATE_9 = 9 -NVML_PSTATE_10 = 10 -NVML_PSTATE_11 = 11 -NVML_PSTATE_12 = 12 -NVML_PSTATE_13 = 13 -NVML_PSTATE_14 = 14 -NVML_PSTATE_15 = 15 -NVML_PSTATE_UNKNOWN = 32 - -_nvmlInforomObject_t = c_uint -NVML_INFOROM_OEM = 0 -NVML_INFOROM_ECC = 1 -NVML_INFOROM_POWER = 2 -NVML_INFOROM_COUNT = 3 - -_nvmlReturn_t = c_uint -NVML_SUCCESS = 0 -NVML_ERROR_UNINITIALIZED = 1 -NVML_ERROR_INVALID_ARGUMENT = 2 -NVML_ERROR_NOT_SUPPORTED = 3 -NVML_ERROR_NO_PERMISSION = 4 -NVML_ERROR_ALREADY_INITIALIZED = 5 -NVML_ERROR_NOT_FOUND = 6 -NVML_ERROR_INSUFFICIENT_SIZE = 7 -NVML_ERROR_INSUFFICIENT_POWER = 8 -NVML_ERROR_DRIVER_NOT_LOADED = 9 -NVML_ERROR_TIMEOUT = 10 -NVML_ERROR_IRQ_ISSUE = 11 -NVML_ERROR_LIBRARY_NOT_FOUND = 12 -NVML_ERROR_FUNCTION_NOT_FOUND = 13 -NVML_ERROR_CORRUPTED_INFOROM = 14 -NVML_ERROR_GPU_IS_LOST = 15 -NVML_ERROR_RESET_REQUIRED = 16 -NVML_ERROR_OPERATING_SYSTEM = 17 -NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18 -NVML_ERROR_UNKNOWN = 999 - -_nvmlFanState_t = c_uint -NVML_FAN_NORMAL = 0 -NVML_FAN_FAILED = 1 - -_nvmlLedColor_t = c_uint -NVML_LED_COLOR_GREEN = 0 -NVML_LED_COLOR_AMBER = 1 - -_nvmlGpuOperationMode_t = c_uint -NVML_GOM_ALL_ON = 0 -NVML_GOM_COMPUTE = 1 -NVML_GOM_LOW_DP = 2 - -_nvmlPageRetirementCause_t = c_uint -NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR = 0 -NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS = 1 -NVML_PAGE_RETIREMENT_CAUSE_COUNT = 2 - -_nvmlRestrictedAPI_t = c_uint -NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0 -NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1 -NVML_RESTRICTED_API_COUNT = 2 - -_nvmlBridgeChipType_t = c_uint -NVML_BRIDGE_CHIP_PLX = 0 -NVML_BRIDGE_CHIP_BRO4 = 1 -NVML_MAX_PHYSICAL_BRIDGE = 128 - -_nvmlValueType_t = c_uint -NVML_VALUE_TYPE_DOUBLE = 0 -NVML_VALUE_TYPE_UNSIGNED_INT = 1 -NVML_VALUE_TYPE_UNSIGNED_LONG = 2 -NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3 -NVML_VALUE_TYPE_COUNT = 4 - -_nvmlPerfPolicyType_t = c_uint -NVML_PERF_POLICY_POWER = 0 -NVML_PERF_POLICY_THERMAL = 1 -NVML_PERF_POLICY_COUNT = 2 - -_nvmlSamplingType_t = c_uint -NVML_TOTAL_POWER_SAMPLES = 0 -NVML_GPU_UTILIZATION_SAMPLES = 1 -NVML_MEMORY_UTILIZATION_SAMPLES = 2 -NVML_ENC_UTILIZATION_SAMPLES = 3 -NVML_DEC_UTILIZATION_SAMPLES = 4 -NVML_PROCESSOR_CLK_SAMPLES = 5 -NVML_MEMORY_CLK_SAMPLES = 6 -NVML_SAMPLINGTYPE_COUNT = 7 - -_nvmlPcieUtilCounter_t = c_uint -NVML_PCIE_UTIL_TX_BYTES = 0 -NVML_PCIE_UTIL_RX_BYTES = 1 -NVML_PCIE_UTIL_COUNT = 2 - -_nvmlGpuTopologyLevel_t = c_uint -NVML_TOPOLOGY_INTERNAL = 0 -NVML_TOPOLOGY_SINGLE = 10 -NVML_TOPOLOGY_MULTIPLE = 20 -NVML_TOPOLOGY_HOSTBRIDGE = 30 -NVML_TOPOLOGY_CPU = 40 -NVML_TOPOLOGY_SYSTEM = 50 - -# C preprocessor defined values -nvmlFlagDefault = 0 -nvmlFlagForce = 1 - -# buffer size -NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE = 16 -NVML_DEVICE_UUID_BUFFER_SIZE = 80 -NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE = 81 -NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE = 80 -NVML_DEVICE_NAME_BUFFER_SIZE = 64 -NVML_DEVICE_SERIAL_BUFFER_SIZE = 30 -NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE = 32 -NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE = 16 - -NVML_VALUE_NOT_AVAILABLE_ulonglong = c_ulonglong(-1) -NVML_VALUE_NOT_AVAILABLE_uint = c_uint(-1) - -## Lib loading ## -nvmlLib = None -libLoadLock = threading.Lock() -_nvmlLib_refcount = 0 # Incremented on each nvmlInit and decremented on nvmlShutdown - -## Error Checking ## -class NVMLError(Exception): - _valClassMapping = dict() - # List of currently known error codes - _errcode_to_string = { - NVML_ERROR_UNINITIALIZED: "Uninitialized", - NVML_ERROR_INVALID_ARGUMENT: "Invalid Argument", - NVML_ERROR_NOT_SUPPORTED: "Not Supported", - NVML_ERROR_NO_PERMISSION: "Insufficient Permissions", - NVML_ERROR_ALREADY_INITIALIZED: "Already Initialized", - NVML_ERROR_NOT_FOUND: "Not Found", - NVML_ERROR_INSUFFICIENT_SIZE: "Insufficient Size", - NVML_ERROR_INSUFFICIENT_POWER: "Insufficient External Power", - NVML_ERROR_DRIVER_NOT_LOADED: "Driver Not Loaded", - NVML_ERROR_TIMEOUT: "Timeout", - NVML_ERROR_IRQ_ISSUE: "Interrupt Request Issue", - NVML_ERROR_LIBRARY_NOT_FOUND: "NVML Shared Library Not Found", - NVML_ERROR_FUNCTION_NOT_FOUND: "Function Not Found", - NVML_ERROR_CORRUPTED_INFOROM: "Corrupted infoROM", - NVML_ERROR_GPU_IS_LOST: "GPU is lost", - NVML_ERROR_RESET_REQUIRED: "GPU requires restart", - NVML_ERROR_OPERATING_SYSTEM: "The operating system has blocked the request.", - NVML_ERROR_LIB_RM_VERSION_MISMATCH: "RM has detected an NVML/RM version mismatch.", - NVML_ERROR_UNKNOWN: "Unknown Error", - } - def __new__(typ, value): - ''' - Maps value to a proper subclass of NVMLError. - See _extractNVMLErrorsAsClasses function for more details - ''' - if typ == NVMLError: - typ = NVMLError._valClassMapping.get(value, typ) - obj = Exception.__new__(typ) - obj.value = value - return obj - def __str__(self): - try: - if self.value not in NVMLError._errcode_to_string: - NVMLError._errcode_to_string[self.value] = str(nvmlErrorString(self.value)) - return NVMLError._errcode_to_string[self.value] - except NVMLError_Uninitialized: - return "NVML Error with code %d" % self.value - def __eq__(self, other): - return self.value == other.value - -def _extractNVMLErrorsAsClasses(): - ''' - Generates a hierarchy of classes on top of NVMLError class. - - Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate - exceptions more easily. - - NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass. - e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized - ''' - this_module = sys.modules[__name__] - nvmlErrorsNames = filter(lambda x: x.startswith("NVML_ERROR_"), dir(this_module)) - for err_name in nvmlErrorsNames: - # e.g. Turn NVML_ERROR_ALREADY_INITIALIZED into NVMLError_AlreadyInitialized - class_name = "NVMLError_" + string.capwords(err_name.replace("NVML_ERROR_", ""), "_").replace("_", "") - err_val = getattr(this_module, err_name) - def gen_new(val): - def new(typ): - obj = NVMLError.__new__(typ, val) - return obj - return new - new_error_class = type(class_name, (NVMLError,), {'__new__': gen_new(err_val)}) - new_error_class.__module__ = __name__ - setattr(this_module, class_name, new_error_class) - NVMLError._valClassMapping[err_val] = new_error_class -_extractNVMLErrorsAsClasses() - -def _nvmlCheckReturn(ret): - if (ret != NVML_SUCCESS): - raise NVMLError(ret) - return ret - -## Function access ## -_nvmlGetFunctionPointer_cache = dict() # function pointers are cached to prevent unnecessary libLoadLock locking -def _nvmlGetFunctionPointer(name): - global nvmlLib - - if name in _nvmlGetFunctionPointer_cache: - return _nvmlGetFunctionPointer_cache[name] - - libLoadLock.acquire() - try: - # ensure library was loaded - if (nvmlLib == None): - raise NVMLError(NVML_ERROR_UNINITIALIZED) - try: - _nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name) - return _nvmlGetFunctionPointer_cache[name] - except AttributeError: - raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND) - finally: - # lock is always freed - libLoadLock.release() - -## Alternative object -# Allows the object to be printed -# Allows mismatched types to be assigned -# - like None when the Structure variant requires c_uint -class nvmlFriendlyObject(object): - def __init__(self, dictionary): - for x in dictionary: - setattr(self, x, dictionary[x]) - def __str__(self): - return self.__dict__.__str__() - -def nvmlStructToFriendlyObject(struct): - d = {} - for x in struct._fields_: - key = x[0] - value = getattr(struct, key) - d[key] = value - obj = nvmlFriendlyObject(d) - return obj - -# pack the object so it can be passed to the NVML library -def nvmlFriendlyObjectToStruct(obj, model): - for x in model._fields_: - key = x[0] - value = obj.__dict__[key] - setattr(model, key, value) - return model - -## Unit structures -class struct_c_nvmlUnit_t(Structure): - pass # opaque handle -c_nvmlUnit_t = POINTER(struct_c_nvmlUnit_t) - -class _PrintableStructure(Structure): - """ - Abstract class that produces nicer __str__ output than ctypes.Structure. - e.g. instead of: - >>> print str(obj) - - this class will print - class_name(field_name: formatted_value, field_name: formatted_value) - - _fmt_ dictionary of -> - e.g. class that has _field_ 'hex_value', c_uint could be formatted with - _fmt_ = {"hex_value" : "%08X"} - to produce nicer output. - Default fomratting string for all fields can be set with key "" like: - _fmt_ = {"" : "%d MHz"} # e.g all values are numbers in MHz. - If not set it's assumed to be just "%s" - - Exact format of returned str from this class is subject to change in the future. - """ - _fmt_ = {} - def __str__(self): - result = [] - for x in self._fields_: - key = x[0] - value = getattr(self, key) - fmt = "%s" - if key in self._fmt_: - fmt = self._fmt_[key] - elif "" in self._fmt_: - fmt = self._fmt_[""] - result.append(("%s: " + fmt) % (key, value)) - return self.__class__.__name__ + "(" + string.join(result, ", ") + ")" - -class c_nvmlUnitInfo_t(_PrintableStructure): - _fields_ = [ - ('name', c_char * 96), - ('id', c_char * 96), - ('serial', c_char * 96), - ('firmwareVersion', c_char * 96), - ] - -class c_nvmlLedState_t(_PrintableStructure): - _fields_ = [ - ('cause', c_char * 256), - ('color', _nvmlLedColor_t), - ] - -class c_nvmlPSUInfo_t(_PrintableStructure): - _fields_ = [ - ('state', c_char * 256), - ('current', c_uint), - ('voltage', c_uint), - ('power', c_uint), - ] - -class c_nvmlUnitFanInfo_t(_PrintableStructure): - _fields_ = [ - ('speed', c_uint), - ('state', _nvmlFanState_t), - ] - -class c_nvmlUnitFanSpeeds_t(_PrintableStructure): - _fields_ = [ - ('fans', c_nvmlUnitFanInfo_t * 24), - ('count', c_uint) - ] - -## Device structures -class struct_c_nvmlDevice_t(Structure): - pass # opaque handle -c_nvmlDevice_t = POINTER(struct_c_nvmlDevice_t) - -class nvmlPciInfo_t(_PrintableStructure): - _fields_ = [ - ('busId', c_char * 16), - ('domain', c_uint), - ('bus', c_uint), - ('device', c_uint), - ('pciDeviceId', c_uint), - - # Added in 2.285 - ('pciSubSystemId', c_uint), - ('reserved0', c_uint), - ('reserved1', c_uint), - ('reserved2', c_uint), - ('reserved3', c_uint), - ] - _fmt_ = { - 'domain' : "0x%04X", - 'bus' : "0x%02X", - 'device' : "0x%02X", - 'pciDeviceId' : "0x%08X", - 'pciSubSystemId' : "0x%08X", - } - -class c_nvmlMemory_t(_PrintableStructure): - _fields_ = [ - ('total', c_ulonglong), - ('free', c_ulonglong), - ('used', c_ulonglong), - ] - _fmt_ = {'': "%d B"} - -class c_nvmlBAR1Memory_t(_PrintableStructure): - _fields_ = [ - ('bar1Total', c_ulonglong), - ('bar1Free', c_ulonglong), - ('bar1Used', c_ulonglong), - ] - _fmt_ = {'': "%d B"} - -# On Windows with the WDDM driver, usedGpuMemory is reported as None -# Code that processes this structure should check for None, I.E. -# -# if (info.usedGpuMemory == None): -# # TODO handle the error -# pass -# else: -# print("Using %d MiB of memory" % (info.usedGpuMemory / 1024 / 1024)) -# -# See NVML documentation for more information -class c_nvmlProcessInfo_t(_PrintableStructure): - _fields_ = [ - ('pid', c_uint), - ('usedGpuMemory', c_ulonglong), - ] - _fmt_ = {'usedGpuMemory': "%d B"} - -class c_nvmlBridgeChipInfo_t(_PrintableStructure): - _fields_ = [ - ('type', _nvmlBridgeChipType_t), - ('fwVersion', c_uint), - ] - -class c_nvmlBridgeChipHierarchy_t(_PrintableStructure): - _fields_ = [ - ('bridgeCount', c_uint), - ('bridgeChipInfo', c_nvmlBridgeChipInfo_t * 128), - ] - -class c_nvmlEccErrorCounts_t(_PrintableStructure): - _fields_ = [ - ('l1Cache', c_ulonglong), - ('l2Cache', c_ulonglong), - ('deviceMemory', c_ulonglong), - ('registerFile', c_ulonglong), - ] - -class c_nvmlUtilization_t(_PrintableStructure): - _fields_ = [ - ('gpu', c_uint), - ('memory', c_uint), - ] - _fmt_ = {'': "%d %%"} - -# Added in 2.285 -class c_nvmlHwbcEntry_t(_PrintableStructure): - _fields_ = [ - ('hwbcId', c_uint), - ('firmwareVersion', c_char * 32), - ] - -class c_nvmlValue_t(Union): - _fields_ = [ - ('dVal', c_double), - ('uiVal', c_uint), - ('ulVal', c_ulong), - ('ullVal', c_ulonglong), - ] - -class c_nvmlSample_t(_PrintableStructure): - _fields_ = [ - ('timeStamp', c_ulonglong), - ('sampleValue', c_nvmlValue_t), - ] - -class c_nvmlViolationTime_t(_PrintableStructure): - _fields_ = [ - ('referenceTime', c_ulonglong), - ('violationTime', c_ulonglong), - ] - -## Event structures -class struct_c_nvmlEventSet_t(Structure): - pass # opaque handle -c_nvmlEventSet_t = POINTER(struct_c_nvmlEventSet_t) - -nvmlEventTypeSingleBitEccError = 0x0000000000000001 -nvmlEventTypeDoubleBitEccError = 0x0000000000000002 -nvmlEventTypePState = 0x0000000000000004 -nvmlEventTypeXidCriticalError = 0x0000000000000008 -nvmlEventTypeClock = 0x0000000000000010 -nvmlEventTypeNone = 0x0000000000000000 -nvmlEventTypeAll = ( - nvmlEventTypeNone | - nvmlEventTypeSingleBitEccError | - nvmlEventTypeDoubleBitEccError | - nvmlEventTypePState | - nvmlEventTypeClock | - nvmlEventTypeXidCriticalError - ) - -## Clock Throttle Reasons defines -nvmlClocksThrottleReasonGpuIdle = 0x0000000000000001 -nvmlClocksThrottleReasonApplicationsClocksSetting = 0x0000000000000002 -nvmlClocksThrottleReasonUserDefinedClocks = nvmlClocksThrottleReasonApplicationsClocksSetting # deprecated, use nvmlClocksThrottleReasonApplicationsClocksSetting -nvmlClocksThrottleReasonSwPowerCap = 0x0000000000000004 -nvmlClocksThrottleReasonHwSlowdown = 0x0000000000000008 -nvmlClocksThrottleReasonUnknown = 0x8000000000000000 -nvmlClocksThrottleReasonNone = 0x0000000000000000 -nvmlClocksThrottleReasonAll = ( - nvmlClocksThrottleReasonNone | - nvmlClocksThrottleReasonGpuIdle | - nvmlClocksThrottleReasonApplicationsClocksSetting | - nvmlClocksThrottleReasonSwPowerCap | - nvmlClocksThrottleReasonHwSlowdown | - nvmlClocksThrottleReasonUnknown - ) - -class c_nvmlEventData_t(_PrintableStructure): - _fields_ = [ - ('device', c_nvmlDevice_t), - ('eventType', c_ulonglong), - ('eventData', c_ulonglong) - ] - _fmt_ = {'eventType': "0x%08X"} - -class c_nvmlAccountingStats_t(_PrintableStructure): - _fields_ = [ - ('gpuUtilization', c_uint), - ('memoryUtilization', c_uint), - ('maxMemoryUsage', c_ulonglong), - ('time', c_ulonglong), - ('startTime', c_ulonglong), - ('isRunning', c_uint), - ('reserved', c_uint * 5) - ] - -## C function wrappers ## -def nvmlInit(): - _LoadNvmlLibrary() - - # - # Initialize the library - # - fn = _nvmlGetFunctionPointer("nvmlInit_v2") - ret = fn() - _nvmlCheckReturn(ret) - - # Atomically update refcount - global _nvmlLib_refcount - libLoadLock.acquire() - _nvmlLib_refcount += 1 - libLoadLock.release() - return None - -def _LoadNvmlLibrary(): - ''' - Load the library if it isn't loaded already - ''' - global nvmlLib - - if (nvmlLib == None): - # lock to ensure only one caller loads the library - libLoadLock.acquire() - - try: - # ensure the library still isn't loaded - if (nvmlLib == None): - try: - if (sys.platform[:3] == "win"): - searchPaths = [ - os.path.join(os.getenv("ProgramFiles", r"C:\Program Files"), r"NVIDIA Corporation\NVSMI\nvml.dll"), - os.path.join(os.getenv("WinDir", r"C:\Windows"), r"System32\nvml.dll"), - ] - nvmlPath = next((x for x in searchPaths if os.path.isfile(x)), None) - if (nvmlPath == None): - _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) - else: - # cdecl calling convention - nvmlLib = CDLL(nvmlPath) - else: - # assume linux - nvmlLib = CDLL("libnvidia-ml.so.1") - except OSError as ose: - _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) - if (nvmlLib == None): - _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) - finally: - # lock is always freed - libLoadLock.release() - -def nvmlShutdown(): - # - # Leave the library loaded, but shutdown the interface - # - fn = _nvmlGetFunctionPointer("nvmlShutdown") - ret = fn() - _nvmlCheckReturn(ret) - - # Atomically update refcount - global _nvmlLib_refcount - libLoadLock.acquire() - if (0 < _nvmlLib_refcount): - _nvmlLib_refcount -= 1 - libLoadLock.release() - return None - -# Added in 2.285 -def nvmlErrorString(result): - fn = _nvmlGetFunctionPointer("nvmlErrorString") - fn.restype = c_char_p # otherwise return is an int - ret = fn(result) - return ret - -# Added in 2.285 -def nvmlSystemGetNVMLVersion(): - c_version = create_string_buffer(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlSystemGetNVMLVersion") - ret = fn(c_version, c_uint(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlSystemGetProcessName(pid): - c_name = create_string_buffer(1024) - fn = _nvmlGetFunctionPointer("nvmlSystemGetProcessName") - ret = fn(c_uint(pid), c_name, c_uint(1024)) - _nvmlCheckReturn(ret) - return c_name.value - -def nvmlSystemGetDriverVersion(): - c_version = create_string_buffer(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlSystemGetDriverVersion") - ret = fn(c_version, c_uint(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlSystemGetHicVersion(): - c_count = c_uint(0) - hics = None - fn = _nvmlGetFunctionPointer("nvmlSystemGetHicVersion") - - # get the count - ret = fn(byref(c_count), None) - - # this should only fail with insufficient size - if ((ret != NVML_SUCCESS) and - (ret != NVML_ERROR_INSUFFICIENT_SIZE)): - raise NVMLError(ret) - - # if there are no hics - if (c_count.value == 0): - return [] - - hic_array = c_nvmlHwbcEntry_t * c_count.value - hics = hic_array() - ret = fn(byref(c_count), hics) - _nvmlCheckReturn(ret) - return hics - -## Unit get functions -def nvmlUnitGetCount(): - c_count = c_uint() - fn = _nvmlGetFunctionPointer("nvmlUnitGetCount") - ret = fn(byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlUnitGetHandleByIndex(index): - c_index = c_uint(index) - unit = c_nvmlUnit_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetHandleByIndex") - ret = fn(c_index, byref(unit)) - _nvmlCheckReturn(ret) - return unit - -def nvmlUnitGetUnitInfo(unit): - c_info = c_nvmlUnitInfo_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetUnitInfo") - ret = fn(unit, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlUnitGetLedState(unit): - c_state = c_nvmlLedState_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetLedState") - ret = fn(unit, byref(c_state)) - _nvmlCheckReturn(ret) - return c_state - -def nvmlUnitGetPsuInfo(unit): - c_info = c_nvmlPSUInfo_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetPsuInfo") - ret = fn(unit, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlUnitGetTemperature(unit, type): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlUnitGetTemperature") - ret = fn(unit, c_uint(type), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -def nvmlUnitGetFanSpeedInfo(unit): - c_speeds = c_nvmlUnitFanSpeeds_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetFanSpeedInfo") - ret = fn(unit, byref(c_speeds)) - _nvmlCheckReturn(ret) - return c_speeds - -# added to API -def nvmlUnitGetDeviceCount(unit): - c_count = c_uint(0) - # query the unit to determine device count - fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") - ret = fn(unit, byref(c_count), None) - if (ret == NVML_ERROR_INSUFFICIENT_SIZE): - ret = NVML_SUCCESS - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlUnitGetDevices(unit): - c_count = c_uint(nvmlUnitGetDeviceCount(unit)) - device_array = c_nvmlDevice_t * c_count.value - c_devices = device_array() - fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") - ret = fn(unit, byref(c_count), c_devices) - _nvmlCheckReturn(ret) - return c_devices - -## Device get functions -def nvmlDeviceGetCount(): - c_count = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCount_v2") - ret = fn(byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlDeviceGetHandleByIndex(index): - c_index = c_uint(index) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByIndex_v2") - ret = fn(c_index, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleBySerial(serial): - c_serial = c_char_p(serial) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleBySerial") - ret = fn(c_serial, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleByUUID(uuid): - c_uuid = c_char_p(uuid) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByUUID") - ret = fn(c_uuid, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleByPciBusId(pciBusId): - c_busId = c_char_p(pciBusId) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByPciBusId_v2") - ret = fn(c_busId, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetName(handle): - c_name = create_string_buffer(NVML_DEVICE_NAME_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetName") - ret = fn(handle, c_name, c_uint(NVML_DEVICE_NAME_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_name.value - -def nvmlDeviceGetBoardId(handle): - c_id = c_uint(); - fn = _nvmlGetFunctionPointer("nvmlDeviceGetBoardId") - ret = fn(handle, byref(c_id)) - _nvmlCheckReturn(ret) - return c_id.value - -def nvmlDeviceGetMultiGpuBoard(handle): - c_multiGpu = c_uint(); - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMultiGpuBoard") - ret = fn(handle, byref(c_multiGpu)) - _nvmlCheckReturn(ret) - return c_multiGpu.value - -def nvmlDeviceGetBrand(handle): - c_type = _nvmlBrandType_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetBrand") - ret = fn(handle, byref(c_type)) - _nvmlCheckReturn(ret) - return c_type.value - -def nvmlDeviceGetSerial(handle): - c_serial = create_string_buffer(NVML_DEVICE_SERIAL_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSerial") - ret = fn(handle, c_serial, c_uint(NVML_DEVICE_SERIAL_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_serial.value - -def nvmlDeviceGetCpuAffinity(handle, cpuSetSize): - affinity_array = c_ulonglong * cpuSetSize - c_affinity = affinity_array() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCpuAffinity") - ret = fn(handle, cpuSetSize, byref(c_affinity)) - _nvmlCheckReturn(ret) - return c_affinity - -def nvmlDeviceSetCpuAffinity(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetCpuAffinity") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceClearCpuAffinity(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceClearCpuAffinity") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceGetMinorNumber(handle): - c_minor_number = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMinorNumber") - ret = fn(handle, byref(c_minor_number)) - _nvmlCheckReturn(ret) - return c_minor_number.value - -def nvmlDeviceGetUUID(handle): - c_uuid = create_string_buffer(NVML_DEVICE_UUID_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetUUID") - ret = fn(handle, c_uuid, c_uint(NVML_DEVICE_UUID_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_uuid.value - -def nvmlDeviceGetInforomVersion(handle, infoRomObject): - c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomVersion") - ret = fn(handle, _nvmlInforomObject_t(infoRomObject), - c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 4.304 -def nvmlDeviceGetInforomImageVersion(handle): - c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomImageVersion") - ret = fn(handle, c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 4.304 -def nvmlDeviceGetInforomConfigurationChecksum(handle): - c_checksum = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomConfigurationChecksum") - ret = fn(handle, byref(c_checksum)) - _nvmlCheckReturn(ret) - return c_checksum.value - -# Added in 4.304 -def nvmlDeviceValidateInforom(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceValidateInforom") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceGetDisplayMode(handle): - c_mode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceGetDisplayActive(handle): - c_mode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayActive") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - - -def nvmlDeviceGetPersistenceMode(handle): - c_state = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPersistenceMode") - ret = fn(handle, byref(c_state)) - _nvmlCheckReturn(ret) - return c_state.value - -def nvmlDeviceGetPciInfo(handle): - c_info = nvmlPciInfo_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPciInfo_v2") - ret = fn(handle, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlDeviceGetClockInfo(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetClockInfo") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 2.285 -def nvmlDeviceGetMaxClockInfo(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxClockInfo") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 4.304 -def nvmlDeviceGetApplicationsClock(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetApplicationsClock") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 5.319 -def nvmlDeviceGetDefaultApplicationsClock(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDefaultApplicationsClock") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 4.304 -def nvmlDeviceGetSupportedMemoryClocks(handle): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedMemoryClocks") - ret = fn(handle, byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no clocks - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - clocks_array = c_uint * c_count.value - c_clocks = clocks_array() - - # make the call again - ret = fn(handle, byref(c_count), c_clocks) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - procs.append(c_clocks[i]) - - return procs - else: - # error case - raise NVMLError(ret) - -# Added in 4.304 -def nvmlDeviceGetSupportedGraphicsClocks(handle, memoryClockMHz): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedGraphicsClocks") - ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no clocks - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - clocks_array = c_uint * c_count.value - c_clocks = clocks_array() - - # make the call again - ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), c_clocks) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - procs.append(c_clocks[i]) - - return procs - else: - # error case - raise NVMLError(ret) - -def nvmlDeviceGetFanSpeed(handle): - c_speed = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetFanSpeed") - ret = fn(handle, byref(c_speed)) - _nvmlCheckReturn(ret) - return c_speed.value - -def nvmlDeviceGetTemperature(handle, sensor): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperature") - ret = fn(handle, _nvmlTemperatureSensors_t(sensor), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -def nvmlDeviceGetTemperatureThreshold(handle, threshold): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperatureThreshold") - ret = fn(handle, _nvmlTemperatureThresholds_t(threshold), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -# DEPRECATED use nvmlDeviceGetPerformanceState -def nvmlDeviceGetPowerState(handle): - c_pstate = _nvmlPstates_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerState") - ret = fn(handle, byref(c_pstate)) - _nvmlCheckReturn(ret) - return c_pstate.value - -def nvmlDeviceGetPerformanceState(handle): - c_pstate = _nvmlPstates_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPerformanceState") - ret = fn(handle, byref(c_pstate)) - _nvmlCheckReturn(ret) - return c_pstate.value - -def nvmlDeviceGetPowerManagementMode(handle): - c_pcapMode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementMode") - ret = fn(handle, byref(c_pcapMode)) - _nvmlCheckReturn(ret) - return c_pcapMode.value - -def nvmlDeviceGetPowerManagementLimit(handle): - c_limit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimit") - ret = fn(handle, byref(c_limit)) - _nvmlCheckReturn(ret) - return c_limit.value - -# Added in 4.304 -def nvmlDeviceGetPowerManagementLimitConstraints(handle): - c_minLimit = c_uint() - c_maxLimit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimitConstraints") - ret = fn(handle, byref(c_minLimit), byref(c_maxLimit)) - _nvmlCheckReturn(ret) - return [c_minLimit.value, c_maxLimit.value] - -# Added in 4.304 -def nvmlDeviceGetPowerManagementDefaultLimit(handle): - c_limit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementDefaultLimit") - ret = fn(handle, byref(c_limit)) - _nvmlCheckReturn(ret) - return c_limit.value - - -# Added in 331 -def nvmlDeviceGetEnforcedPowerLimit(handle): - c_limit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetEnforcedPowerLimit") - ret = fn(handle, byref(c_limit)) - _nvmlCheckReturn(ret) - return c_limit.value - -def nvmlDeviceGetPowerUsage(handle): - c_watts = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerUsage") - ret = fn(handle, byref(c_watts)) - _nvmlCheckReturn(ret) - return c_watts.value - -# Added in 4.304 -def nvmlDeviceGetGpuOperationMode(handle): - c_currState = _nvmlGpuOperationMode_t() - c_pendingState = _nvmlGpuOperationMode_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetGpuOperationMode") - ret = fn(handle, byref(c_currState), byref(c_pendingState)) - _nvmlCheckReturn(ret) - return [c_currState.value, c_pendingState.value] - -# Added in 4.304 -def nvmlDeviceGetCurrentGpuOperationMode(handle): - return nvmlDeviceGetGpuOperationMode(handle)[0] - -# Added in 4.304 -def nvmlDeviceGetPendingGpuOperationMode(handle): - return nvmlDeviceGetGpuOperationMode(handle)[1] - -def nvmlDeviceGetMemoryInfo(handle): - c_memory = c_nvmlMemory_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryInfo") - ret = fn(handle, byref(c_memory)) - _nvmlCheckReturn(ret) - return c_memory - -def nvmlDeviceGetBAR1MemoryInfo(handle): - c_bar1_memory = c_nvmlBAR1Memory_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetBAR1MemoryInfo") - ret = fn(handle, byref(c_bar1_memory)) - _nvmlCheckReturn(ret) - return c_bar1_memory - -def nvmlDeviceGetComputeMode(handle): - c_mode = _nvmlComputeMode_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceGetEccMode(handle): - c_currState = _nvmlEnableState_t() - c_pendingState = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetEccMode") - ret = fn(handle, byref(c_currState), byref(c_pendingState)) - _nvmlCheckReturn(ret) - return [c_currState.value, c_pendingState.value] - -# added to API -def nvmlDeviceGetCurrentEccMode(handle): - return nvmlDeviceGetEccMode(handle)[0] - -# added to API -def nvmlDeviceGetPendingEccMode(handle): - return nvmlDeviceGetEccMode(handle)[1] - -def nvmlDeviceGetTotalEccErrors(handle, errorType, counterType): - c_count = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTotalEccErrors") - ret = fn(handle, _nvmlMemoryErrorType_t(errorType), - _nvmlEccCounterType_t(counterType), byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -# This is deprecated, instead use nvmlDeviceGetMemoryErrorCounter -def nvmlDeviceGetDetailedEccErrors(handle, errorType, counterType): - c_counts = c_nvmlEccErrorCounts_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDetailedEccErrors") - ret = fn(handle, _nvmlMemoryErrorType_t(errorType), - _nvmlEccCounterType_t(counterType), byref(c_counts)) - _nvmlCheckReturn(ret) - return c_counts - -# Added in 4.304 -def nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, locationType): - c_count = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryErrorCounter") - ret = fn(handle, - _nvmlMemoryErrorType_t(errorType), - _nvmlEccCounterType_t(counterType), - _nvmlMemoryLocation_t(locationType), - byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlDeviceGetUtilizationRates(handle): - c_util = c_nvmlUtilization_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetUtilizationRates") - ret = fn(handle, byref(c_util)) - _nvmlCheckReturn(ret) - return c_util - -def nvmlDeviceGetEncoderUtilization(handle): - c_util = c_uint() - c_samplingPeriod = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetEncoderUtilization") - ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) - _nvmlCheckReturn(ret) - return [c_util.value, c_samplingPeriod.value] - -def nvmlDeviceGetDecoderUtilization(handle): - c_util = c_uint() - c_samplingPeriod = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDecoderUtilization") - ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) - _nvmlCheckReturn(ret) - return [c_util.value, c_samplingPeriod.value] - -def nvmlDeviceGetPcieReplayCounter(handle): - c_replay = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieReplayCounter") - ret = fn(handle, byref(c_replay)) - _nvmlCheckReturn(ret) - return c_replay.value - -def nvmlDeviceGetDriverModel(handle): - c_currModel = _nvmlDriverModel_t() - c_pendingModel = _nvmlDriverModel_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDriverModel") - ret = fn(handle, byref(c_currModel), byref(c_pendingModel)) - _nvmlCheckReturn(ret) - return [c_currModel.value, c_pendingModel.value] - -# added to API -def nvmlDeviceGetCurrentDriverModel(handle): - return nvmlDeviceGetDriverModel(handle)[0] - -# added to API -def nvmlDeviceGetPendingDriverModel(handle): - return nvmlDeviceGetDriverModel(handle)[1] - -# Added in 2.285 -def nvmlDeviceGetVbiosVersion(handle): - c_version = create_string_buffer(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetVbiosVersion") - ret = fn(handle, c_version, c_uint(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlDeviceGetComputeRunningProcesses(handle): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses") - ret = fn(handle, byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no running processes - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - # oversize the array incase more processes are created - c_count.value = c_count.value * 2 + 5 - proc_array = c_nvmlProcessInfo_t * c_count.value - c_procs = proc_array() - - # make the call again - ret = fn(handle, byref(c_count), c_procs) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - # use an alternative struct for this object - obj = nvmlStructToFriendlyObject(c_procs[i]) - if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): - # special case for WDDM on Windows, see comment above - obj.usedGpuMemory = None - procs.append(obj) - - return procs - else: - # error case - raise NVMLError(ret) - -def nvmlDeviceGetGraphicsRunningProcesses(handle): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetGraphicsRunningProcesses") - ret = fn(handle, byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no running processes - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - # oversize the array incase more processes are created - c_count.value = c_count.value * 2 + 5 - proc_array = c_nvmlProcessInfo_t * c_count.value - c_procs = proc_array() - - # make the call again - ret = fn(handle, byref(c_count), c_procs) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - # use an alternative struct for this object - obj = nvmlStructToFriendlyObject(c_procs[i]) - if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): - # special case for WDDM on Windows, see comment above - obj.usedGpuMemory = None - procs.append(obj) - - return procs - else: - # error case - raise NVMLError(ret) - -def nvmlDeviceGetAutoBoostedClocksEnabled(handle): - c_isEnabled = _nvmlEnableState_t() - c_defaultIsEnabled = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAutoBoostedClocksEnabled") - ret = fn(handle, byref(c_isEnabled), byref(c_defaultIsEnabled)) - _nvmlCheckReturn(ret) - return [c_isEnabled.value, c_defaultIsEnabled.value] - #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks - -## Set functions -def nvmlUnitSetLedState(unit, color): - fn = _nvmlGetFunctionPointer("nvmlUnitSetLedState") - ret = fn(unit, _nvmlLedColor_t(color)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetPersistenceMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetPersistenceMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetComputeMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetComputeMode") - ret = fn(handle, _nvmlComputeMode_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetEccMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetEccMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceClearEccErrorCounts(handle, counterType): - fn = _nvmlGetFunctionPointer("nvmlDeviceClearEccErrorCounts") - ret = fn(handle, _nvmlEccCounterType_t(counterType)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetDriverModel(handle, model): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetDriverModel") - ret = fn(handle, _nvmlDriverModel_t(model)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetAutoBoostedClocksEnabled(handle, enabled): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetAutoBoostedClocksEnabled") - ret = fn(handle, _nvmlEnableState_t(enabled)) - _nvmlCheckReturn(ret) - return None - #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks - -def nvmlDeviceSetDefaultAutoBoostedClocksEnabled(handle, enabled, flags): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetDefaultAutoBoostedClocksEnabled") - ret = fn(handle, _nvmlEnableState_t(enabled), c_uint(flags)) - _nvmlCheckReturn(ret) - return None - #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks - -# Added in 4.304 -def nvmlDeviceSetApplicationsClocks(handle, maxMemClockMHz, maxGraphicsClockMHz): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetApplicationsClocks") - ret = fn(handle, c_uint(maxMemClockMHz), c_uint(maxGraphicsClockMHz)) - _nvmlCheckReturn(ret) - return None - -# Added in 4.304 -def nvmlDeviceResetApplicationsClocks(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceResetApplicationsClocks") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -# Added in 4.304 -def nvmlDeviceSetPowerManagementLimit(handle, limit): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetPowerManagementLimit") - ret = fn(handle, c_uint(limit)) - _nvmlCheckReturn(ret) - return None - -# Added in 4.304 -def nvmlDeviceSetGpuOperationMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetGpuOperationMode") - ret = fn(handle, _nvmlGpuOperationMode_t(mode)) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlEventSetCreate(): - fn = _nvmlGetFunctionPointer("nvmlEventSetCreate") - eventSet = c_nvmlEventSet_t() - ret = fn(byref(eventSet)) - _nvmlCheckReturn(ret) - return eventSet - -# Added in 2.285 -def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet): - fn = _nvmlGetFunctionPointer("nvmlDeviceRegisterEvents") - ret = fn(handle, c_ulonglong(eventTypes), eventSet) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlDeviceGetSupportedEventTypes(handle): - c_eventTypes = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedEventTypes") - ret = fn(handle, byref(c_eventTypes)) - _nvmlCheckReturn(ret) - return c_eventTypes.value - -# Added in 2.285 -# raises NVML_ERROR_TIMEOUT exception on timeout -def nvmlEventSetWait(eventSet, timeoutms): - fn = _nvmlGetFunctionPointer("nvmlEventSetWait") - data = c_nvmlEventData_t() - ret = fn(eventSet, byref(data), c_uint(timeoutms)) - _nvmlCheckReturn(ret) - return data - -# Added in 2.285 -def nvmlEventSetFree(eventSet): - fn = _nvmlGetFunctionPointer("nvmlEventSetFree") - ret = fn(eventSet) - _nvmlCheckReturn(ret) - return None - -# Added in 3.295 -def nvmlDeviceOnSameBoard(handle1, handle2): - fn = _nvmlGetFunctionPointer("nvmlDeviceOnSameBoard") - onSameBoard = c_int() - ret = fn(handle1, handle2, byref(onSameBoard)) - _nvmlCheckReturn(ret) - return (onSameBoard.value != 0) - -# Added in 3.295 -def nvmlDeviceGetCurrPcieLinkGeneration(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkGeneration") - gen = c_uint() - ret = fn(handle, byref(gen)) - _nvmlCheckReturn(ret) - return gen.value - -# Added in 3.295 -def nvmlDeviceGetMaxPcieLinkGeneration(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkGeneration") - gen = c_uint() - ret = fn(handle, byref(gen)) - _nvmlCheckReturn(ret) - return gen.value - -# Added in 3.295 -def nvmlDeviceGetCurrPcieLinkWidth(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkWidth") - width = c_uint() - ret = fn(handle, byref(width)) - _nvmlCheckReturn(ret) - return width.value - -# Added in 3.295 -def nvmlDeviceGetMaxPcieLinkWidth(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkWidth") - width = c_uint() - ret = fn(handle, byref(width)) - _nvmlCheckReturn(ret) - return width.value - -# Added in 4.304 -def nvmlDeviceGetSupportedClocksThrottleReasons(handle): - c_reasons= c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedClocksThrottleReasons") - ret = fn(handle, byref(c_reasons)) - _nvmlCheckReturn(ret) - return c_reasons.value - -# Added in 4.304 -def nvmlDeviceGetCurrentClocksThrottleReasons(handle): - c_reasons= c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrentClocksThrottleReasons") - ret = fn(handle, byref(c_reasons)) - _nvmlCheckReturn(ret) - return c_reasons.value - -# Added in 5.319 -def nvmlDeviceGetIndex(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetIndex") - c_index = c_uint() - ret = fn(handle, byref(c_index)) - _nvmlCheckReturn(ret) - return c_index.value - -# Added in 5.319 -def nvmlDeviceGetAccountingMode(handle): - c_mode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceSetAccountingMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetAccountingMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceClearAccountingPids(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceClearAccountingPids") - ret = fn(handle) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceGetAccountingStats(handle, pid): - stats = c_nvmlAccountingStats_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingStats") - ret = fn(handle, c_uint(pid), byref(stats)) - _nvmlCheckReturn(ret) - if (stats.maxMemoryUsage == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): - # special case for WDDM on Windows, see comment above - stats.maxMemoryUsage = None - return stats - -def nvmlDeviceGetAccountingPids(handle): - count = c_uint(nvmlDeviceGetAccountingBufferSize(handle)) - pids = (c_uint * count.value)() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingPids") - ret = fn(handle, byref(count), pids) - _nvmlCheckReturn(ret) - return map(int, pids[0:count.value]) - -def nvmlDeviceGetAccountingBufferSize(handle): - bufferSize = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingBufferSize") - ret = fn(handle, byref(bufferSize)) - _nvmlCheckReturn(ret) - return int(bufferSize.value) - -def nvmlDeviceGetRetiredPages(device, sourceFilter): - c_source = _nvmlPageRetirementCause_t(sourceFilter) - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPages") - - # First call will get the size - ret = fn(device, c_source, byref(c_count), None) - - # this should only fail with insufficient size - if ((ret != NVML_SUCCESS) and - (ret != NVML_ERROR_INSUFFICIENT_SIZE)): - raise NVMLError(ret) - - # call again with a buffer - # oversize the array for the rare cases where additional pages - # are retired between NVML calls - c_count.value = c_count.value * 2 + 5 - page_array = c_ulonglong * c_count.value - c_pages = page_array() - ret = fn(device, c_source, byref(c_count), c_pages) - _nvmlCheckReturn(ret) - return map(int, c_pages[0:c_count.value]) - -def nvmlDeviceGetRetiredPagesPendingStatus(device): - c_pending = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPagesPendingStatus") - ret = fn(device, byref(c_pending)) - _nvmlCheckReturn(ret) - return int(c_pending.value) - -def nvmlDeviceGetAPIRestriction(device, apiType): - c_permission = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetAPIRestriction") - ret = fn(device, _nvmlRestrictedAPI_t(apiType), byref(c_permission)) - _nvmlCheckReturn(ret) - return int(c_permission.value) - -def nvmlDeviceSetAPIRestriction(handle, apiType, isRestricted): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetAPIRestriction") - ret = fn(handle, _nvmlRestrictedAPI_t(apiType), _nvmlEnableState_t(isRestricted)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceGetBridgeChipInfo(handle): - bridgeHierarchy = c_nvmlBridgeChipHierarchy_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetBridgeChipInfo") - ret = fn(handle, byref(bridgeHierarchy)) - _nvmlCheckReturn(ret) - return bridgeHierarchy - -def nvmlDeviceGetSamples(device, sampling_type, timeStamp): - c_sampling_type = _nvmlSamplingType_t(sampling_type) - c_time_stamp = c_ulonglong(timeStamp) - c_sample_count = c_uint(0) - c_sample_value_type = _nvmlValueType_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSamples") - - ## First Call gets the size - ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), None) - - # Stop if this fails - if (ret != NVML_SUCCESS): - raise NVMLError(ret) - - sampleArray = c_sample_count.value * c_nvmlSample_t - c_samples = sampleArray() - ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), c_samples) - _nvmlCheckReturn(ret) - return (c_sample_value_type.value, c_samples[0:c_sample_count.value]) - -def nvmlDeviceGetViolationStatus(device, perfPolicyType): - c_perfPolicy_type = _nvmlPerfPolicyType_t(perfPolicyType) - c_violTime = c_nvmlViolationTime_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetViolationStatus") - - ## Invoke the method to get violation time - ret = fn(device, c_perfPolicy_type, byref(c_violTime)) - _nvmlCheckReturn(ret) - return c_violTime - -def nvmlDeviceGetPcieThroughput(device, counter): - c_util = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieThroughput") - ret = fn(device, _nvmlPcieUtilCounter_t(counter), byref(c_util)) - _nvmlCheckReturn(ret) - return c_util.value - -def nvmlSystemGetTopologyGpuSet(cpuNumber): - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlSystemGetTopologyGpuSet") - - # First call will get the size - ret = fn(cpuNumber, byref(c_count), None) - - if ret != NVML_SUCCESS: - raise NVMLError(ret) - print(c_count.value) - # call again with a buffer - device_array = c_nvmlDevice_t * c_count.value - c_devices = device_array() - ret = fn(cpuNumber, byref(c_count), c_devices) - _nvmlCheckReturn(ret) - return map(None, c_devices[0:c_count.value]) - -def nvmlDeviceGetTopologyNearestGpus(device, level): - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyNearestGpus") - - # First call will get the size - ret = fn(device, level, byref(c_count), None) - - if ret != NVML_SUCCESS: - raise NVMLError(ret) - - # call again with a buffer - device_array = c_nvmlDevice_t * c_count.value - c_devices = device_array() - ret = fn(device, level, byref(c_count), c_devices) - _nvmlCheckReturn(ret) - return map(None, c_devices[0:c_count.value]) - -def nvmlDeviceGetTopologyCommonAncestor(device1, device2): - c_level = _nvmlGpuTopologyLevel_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyCommonAncestor") - ret = fn(device1, device2, byref(c_level)) - _nvmlCheckReturn(ret) - return c_level.value - -#DeepFaceLab additions -def nvmlDeviceGetCudaComputeCapability(device): - c_major = c_int() - c_minor = c_int() - - try: - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCudaComputeCapability") - except: - return 9, 9 - - # get the count - ret = fn(device, byref(c_major), byref(c_minor)) - - # this should only fail with insufficient size - if (ret != NVML_SUCCESS): - raise NVMLError(ret) - +##### +# Copyright (c) 2011-2015, NVIDIA Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the NVIDIA Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +##### + +## +# Python bindings for the NVML library +## +from ctypes import * +from ctypes.util import find_library +import sys +import os +import threading +import string + +## C Type mappings ## +## Enums +_nvmlEnableState_t = c_uint +NVML_FEATURE_DISABLED = 0 +NVML_FEATURE_ENABLED = 1 + +_nvmlBrandType_t = c_uint +NVML_BRAND_UNKNOWN = 0 +NVML_BRAND_QUADRO = 1 +NVML_BRAND_TESLA = 2 +NVML_BRAND_NVS = 3 +NVML_BRAND_GRID = 4 +NVML_BRAND_GEFORCE = 5 +NVML_BRAND_COUNT = 6 + +_nvmlTemperatureThresholds_t = c_uint +NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0 +NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1 +NVML_TEMPERATURE_THRESHOLD_COUNT = 1 + +_nvmlTemperatureSensors_t = c_uint +NVML_TEMPERATURE_GPU = 0 +NVML_TEMPERATURE_COUNT = 1 + +_nvmlComputeMode_t = c_uint +NVML_COMPUTEMODE_DEFAULT = 0 +NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1 +NVML_COMPUTEMODE_PROHIBITED = 2 +NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 +NVML_COMPUTEMODE_COUNT = 4 + +_nvmlMemoryLocation_t = c_uint +NVML_MEMORY_LOCATION_L1_CACHE = 0 +NVML_MEMORY_LOCATION_L2_CACHE = 1 +NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2 +NVML_MEMORY_LOCATION_REGISTER_FILE = 3 +NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4 +NVML_MEMORY_LOCATION_COUNT = 5 + +# These are deprecated, instead use _nvmlMemoryErrorType_t +_nvmlEccBitType_t = c_uint +NVML_SINGLE_BIT_ECC = 0 +NVML_DOUBLE_BIT_ECC = 1 +NVML_ECC_ERROR_TYPE_COUNT = 2 + +_nvmlEccCounterType_t = c_uint +NVML_VOLATILE_ECC = 0 +NVML_AGGREGATE_ECC = 1 +NVML_ECC_COUNTER_TYPE_COUNT = 2 + +_nvmlMemoryErrorType_t = c_uint +NVML_MEMORY_ERROR_TYPE_CORRECTED = 0 +NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1 +NVML_MEMORY_ERROR_TYPE_COUNT = 2 + +_nvmlClockType_t = c_uint +NVML_CLOCK_GRAPHICS = 0 +NVML_CLOCK_SM = 1 +NVML_CLOCK_MEM = 2 +NVML_CLOCK_COUNT = 3 + +_nvmlDriverModel_t = c_uint +NVML_DRIVER_WDDM = 0 +NVML_DRIVER_WDM = 1 + +_nvmlPstates_t = c_uint +NVML_PSTATE_0 = 0 +NVML_PSTATE_1 = 1 +NVML_PSTATE_2 = 2 +NVML_PSTATE_3 = 3 +NVML_PSTATE_4 = 4 +NVML_PSTATE_5 = 5 +NVML_PSTATE_6 = 6 +NVML_PSTATE_7 = 7 +NVML_PSTATE_8 = 8 +NVML_PSTATE_9 = 9 +NVML_PSTATE_10 = 10 +NVML_PSTATE_11 = 11 +NVML_PSTATE_12 = 12 +NVML_PSTATE_13 = 13 +NVML_PSTATE_14 = 14 +NVML_PSTATE_15 = 15 +NVML_PSTATE_UNKNOWN = 32 + +_nvmlInforomObject_t = c_uint +NVML_INFOROM_OEM = 0 +NVML_INFOROM_ECC = 1 +NVML_INFOROM_POWER = 2 +NVML_INFOROM_COUNT = 3 + +_nvmlReturn_t = c_uint +NVML_SUCCESS = 0 +NVML_ERROR_UNINITIALIZED = 1 +NVML_ERROR_INVALID_ARGUMENT = 2 +NVML_ERROR_NOT_SUPPORTED = 3 +NVML_ERROR_NO_PERMISSION = 4 +NVML_ERROR_ALREADY_INITIALIZED = 5 +NVML_ERROR_NOT_FOUND = 6 +NVML_ERROR_INSUFFICIENT_SIZE = 7 +NVML_ERROR_INSUFFICIENT_POWER = 8 +NVML_ERROR_DRIVER_NOT_LOADED = 9 +NVML_ERROR_TIMEOUT = 10 +NVML_ERROR_IRQ_ISSUE = 11 +NVML_ERROR_LIBRARY_NOT_FOUND = 12 +NVML_ERROR_FUNCTION_NOT_FOUND = 13 +NVML_ERROR_CORRUPTED_INFOROM = 14 +NVML_ERROR_GPU_IS_LOST = 15 +NVML_ERROR_RESET_REQUIRED = 16 +NVML_ERROR_OPERATING_SYSTEM = 17 +NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18 +NVML_ERROR_UNKNOWN = 999 + +_nvmlFanState_t = c_uint +NVML_FAN_NORMAL = 0 +NVML_FAN_FAILED = 1 + +_nvmlLedColor_t = c_uint +NVML_LED_COLOR_GREEN = 0 +NVML_LED_COLOR_AMBER = 1 + +_nvmlGpuOperationMode_t = c_uint +NVML_GOM_ALL_ON = 0 +NVML_GOM_COMPUTE = 1 +NVML_GOM_LOW_DP = 2 + +_nvmlPageRetirementCause_t = c_uint +NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR = 0 +NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS = 1 +NVML_PAGE_RETIREMENT_CAUSE_COUNT = 2 + +_nvmlRestrictedAPI_t = c_uint +NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0 +NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1 +NVML_RESTRICTED_API_COUNT = 2 + +_nvmlBridgeChipType_t = c_uint +NVML_BRIDGE_CHIP_PLX = 0 +NVML_BRIDGE_CHIP_BRO4 = 1 +NVML_MAX_PHYSICAL_BRIDGE = 128 + +_nvmlValueType_t = c_uint +NVML_VALUE_TYPE_DOUBLE = 0 +NVML_VALUE_TYPE_UNSIGNED_INT = 1 +NVML_VALUE_TYPE_UNSIGNED_LONG = 2 +NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3 +NVML_VALUE_TYPE_COUNT = 4 + +_nvmlPerfPolicyType_t = c_uint +NVML_PERF_POLICY_POWER = 0 +NVML_PERF_POLICY_THERMAL = 1 +NVML_PERF_POLICY_COUNT = 2 + +_nvmlSamplingType_t = c_uint +NVML_TOTAL_POWER_SAMPLES = 0 +NVML_GPU_UTILIZATION_SAMPLES = 1 +NVML_MEMORY_UTILIZATION_SAMPLES = 2 +NVML_ENC_UTILIZATION_SAMPLES = 3 +NVML_DEC_UTILIZATION_SAMPLES = 4 +NVML_PROCESSOR_CLK_SAMPLES = 5 +NVML_MEMORY_CLK_SAMPLES = 6 +NVML_SAMPLINGTYPE_COUNT = 7 + +_nvmlPcieUtilCounter_t = c_uint +NVML_PCIE_UTIL_TX_BYTES = 0 +NVML_PCIE_UTIL_RX_BYTES = 1 +NVML_PCIE_UTIL_COUNT = 2 + +_nvmlGpuTopologyLevel_t = c_uint +NVML_TOPOLOGY_INTERNAL = 0 +NVML_TOPOLOGY_SINGLE = 10 +NVML_TOPOLOGY_MULTIPLE = 20 +NVML_TOPOLOGY_HOSTBRIDGE = 30 +NVML_TOPOLOGY_CPU = 40 +NVML_TOPOLOGY_SYSTEM = 50 + +# C preprocessor defined values +nvmlFlagDefault = 0 +nvmlFlagForce = 1 + +# buffer size +NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE = 16 +NVML_DEVICE_UUID_BUFFER_SIZE = 80 +NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE = 81 +NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE = 80 +NVML_DEVICE_NAME_BUFFER_SIZE = 64 +NVML_DEVICE_SERIAL_BUFFER_SIZE = 30 +NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE = 32 +NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE = 16 + +NVML_VALUE_NOT_AVAILABLE_ulonglong = c_ulonglong(-1) +NVML_VALUE_NOT_AVAILABLE_uint = c_uint(-1) + +## Lib loading ## +nvmlLib = None +libLoadLock = threading.Lock() +_nvmlLib_refcount = 0 # Incremented on each nvmlInit and decremented on nvmlShutdown + +## Error Checking ## +class NVMLError(Exception): + _valClassMapping = dict() + # List of currently known error codes + _errcode_to_string = { + NVML_ERROR_UNINITIALIZED: "Uninitialized", + NVML_ERROR_INVALID_ARGUMENT: "Invalid Argument", + NVML_ERROR_NOT_SUPPORTED: "Not Supported", + NVML_ERROR_NO_PERMISSION: "Insufficient Permissions", + NVML_ERROR_ALREADY_INITIALIZED: "Already Initialized", + NVML_ERROR_NOT_FOUND: "Not Found", + NVML_ERROR_INSUFFICIENT_SIZE: "Insufficient Size", + NVML_ERROR_INSUFFICIENT_POWER: "Insufficient External Power", + NVML_ERROR_DRIVER_NOT_LOADED: "Driver Not Loaded", + NVML_ERROR_TIMEOUT: "Timeout", + NVML_ERROR_IRQ_ISSUE: "Interrupt Request Issue", + NVML_ERROR_LIBRARY_NOT_FOUND: "NVML Shared Library Not Found", + NVML_ERROR_FUNCTION_NOT_FOUND: "Function Not Found", + NVML_ERROR_CORRUPTED_INFOROM: "Corrupted infoROM", + NVML_ERROR_GPU_IS_LOST: "GPU is lost", + NVML_ERROR_RESET_REQUIRED: "GPU requires restart", + NVML_ERROR_OPERATING_SYSTEM: "The operating system has blocked the request.", + NVML_ERROR_LIB_RM_VERSION_MISMATCH: "RM has detected an NVML/RM version mismatch.", + NVML_ERROR_UNKNOWN: "Unknown Error", + } + def __new__(typ, value): + ''' + Maps value to a proper subclass of NVMLError. + See _extractNVMLErrorsAsClasses function for more details + ''' + if typ == NVMLError: + typ = NVMLError._valClassMapping.get(value, typ) + obj = Exception.__new__(typ) + obj.value = value + return obj + def __str__(self): + try: + if self.value not in NVMLError._errcode_to_string: + NVMLError._errcode_to_string[self.value] = str(nvmlErrorString(self.value)) + return NVMLError._errcode_to_string[self.value] + except NVMLError_Uninitialized: + return "NVML Error with code %d" % self.value + def __eq__(self, other): + return self.value == other.value + +def _extractNVMLErrorsAsClasses(): + ''' + Generates a hierarchy of classes on top of NVMLError class. + + Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate + exceptions more easily. + + NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass. + e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized + ''' + this_module = sys.modules[__name__] + nvmlErrorsNames = filter(lambda x: x.startswith("NVML_ERROR_"), dir(this_module)) + for err_name in nvmlErrorsNames: + # e.g. Turn NVML_ERROR_ALREADY_INITIALIZED into NVMLError_AlreadyInitialized + class_name = "NVMLError_" + string.capwords(err_name.replace("NVML_ERROR_", ""), "_").replace("_", "") + err_val = getattr(this_module, err_name) + def gen_new(val): + def new(typ): + obj = NVMLError.__new__(typ, val) + return obj + return new + new_error_class = type(class_name, (NVMLError,), {'__new__': gen_new(err_val)}) + new_error_class.__module__ = __name__ + setattr(this_module, class_name, new_error_class) + NVMLError._valClassMapping[err_val] = new_error_class +_extractNVMLErrorsAsClasses() + +def _nvmlCheckReturn(ret): + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + return ret + +## Function access ## +_nvmlGetFunctionPointer_cache = dict() # function pointers are cached to prevent unnecessary libLoadLock locking +def _nvmlGetFunctionPointer(name): + global nvmlLib + + if name in _nvmlGetFunctionPointer_cache: + return _nvmlGetFunctionPointer_cache[name] + + libLoadLock.acquire() + try: + # ensure library was loaded + if (nvmlLib == None): + raise NVMLError(NVML_ERROR_UNINITIALIZED) + try: + _nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name) + return _nvmlGetFunctionPointer_cache[name] + except AttributeError: + raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND) + finally: + # lock is always freed + libLoadLock.release() + +## Alternative object +# Allows the object to be printed +# Allows mismatched types to be assigned +# - like None when the Structure variant requires c_uint +class nvmlFriendlyObject(object): + def __init__(self, dictionary): + for x in dictionary: + setattr(self, x, dictionary[x]) + def __str__(self): + return self.__dict__.__str__() + +def nvmlStructToFriendlyObject(struct): + d = {} + for x in struct._fields_: + key = x[0] + value = getattr(struct, key) + d[key] = value + obj = nvmlFriendlyObject(d) + return obj + +# pack the object so it can be passed to the NVML library +def nvmlFriendlyObjectToStruct(obj, model): + for x in model._fields_: + key = x[0] + value = obj.__dict__[key] + setattr(model, key, value) + return model + +## Unit structures +class struct_c_nvmlUnit_t(Structure): + pass # opaque handle +c_nvmlUnit_t = POINTER(struct_c_nvmlUnit_t) + +class _PrintableStructure(Structure): + """ + Abstract class that produces nicer __str__ output than ctypes.Structure. + e.g. instead of: + >>> print str(obj) + + this class will print + class_name(field_name: formatted_value, field_name: formatted_value) + + _fmt_ dictionary of -> + e.g. class that has _field_ 'hex_value', c_uint could be formatted with + _fmt_ = {"hex_value" : "%08X"} + to produce nicer output. + Default fomratting string for all fields can be set with key "" like: + _fmt_ = {"" : "%d MHz"} # e.g all values are numbers in MHz. + If not set it's assumed to be just "%s" + + Exact format of returned str from this class is subject to change in the future. + """ + _fmt_ = {} + def __str__(self): + result = [] + for x in self._fields_: + key = x[0] + value = getattr(self, key) + fmt = "%s" + if key in self._fmt_: + fmt = self._fmt_[key] + elif "" in self._fmt_: + fmt = self._fmt_[""] + result.append(("%s: " + fmt) % (key, value)) + return self.__class__.__name__ + "(" + string.join(result, ", ") + ")" + +class c_nvmlUnitInfo_t(_PrintableStructure): + _fields_ = [ + ('name', c_char * 96), + ('id', c_char * 96), + ('serial', c_char * 96), + ('firmwareVersion', c_char * 96), + ] + +class c_nvmlLedState_t(_PrintableStructure): + _fields_ = [ + ('cause', c_char * 256), + ('color', _nvmlLedColor_t), + ] + +class c_nvmlPSUInfo_t(_PrintableStructure): + _fields_ = [ + ('state', c_char * 256), + ('current', c_uint), + ('voltage', c_uint), + ('power', c_uint), + ] + +class c_nvmlUnitFanInfo_t(_PrintableStructure): + _fields_ = [ + ('speed', c_uint), + ('state', _nvmlFanState_t), + ] + +class c_nvmlUnitFanSpeeds_t(_PrintableStructure): + _fields_ = [ + ('fans', c_nvmlUnitFanInfo_t * 24), + ('count', c_uint) + ] + +## Device structures +class struct_c_nvmlDevice_t(Structure): + pass # opaque handle +c_nvmlDevice_t = POINTER(struct_c_nvmlDevice_t) + +class nvmlPciInfo_t(_PrintableStructure): + _fields_ = [ + ('busId', c_char * 16), + ('domain', c_uint), + ('bus', c_uint), + ('device', c_uint), + ('pciDeviceId', c_uint), + + # Added in 2.285 + ('pciSubSystemId', c_uint), + ('reserved0', c_uint), + ('reserved1', c_uint), + ('reserved2', c_uint), + ('reserved3', c_uint), + ] + _fmt_ = { + 'domain' : "0x%04X", + 'bus' : "0x%02X", + 'device' : "0x%02X", + 'pciDeviceId' : "0x%08X", + 'pciSubSystemId' : "0x%08X", + } + +class c_nvmlMemory_t(_PrintableStructure): + _fields_ = [ + ('total', c_ulonglong), + ('free', c_ulonglong), + ('used', c_ulonglong), + ] + _fmt_ = {'': "%d B"} + +class c_nvmlBAR1Memory_t(_PrintableStructure): + _fields_ = [ + ('bar1Total', c_ulonglong), + ('bar1Free', c_ulonglong), + ('bar1Used', c_ulonglong), + ] + _fmt_ = {'': "%d B"} + +# On Windows with the WDDM driver, usedGpuMemory is reported as None +# Code that processes this structure should check for None, I.E. +# +# if (info.usedGpuMemory == None): +# # TODO handle the error +# pass +# else: +# print("Using %d MiB of memory" % (info.usedGpuMemory / 1024 / 1024)) +# +# See NVML documentation for more information +class c_nvmlProcessInfo_t(_PrintableStructure): + _fields_ = [ + ('pid', c_uint), + ('usedGpuMemory', c_ulonglong), + ] + _fmt_ = {'usedGpuMemory': "%d B"} + +class c_nvmlBridgeChipInfo_t(_PrintableStructure): + _fields_ = [ + ('type', _nvmlBridgeChipType_t), + ('fwVersion', c_uint), + ] + +class c_nvmlBridgeChipHierarchy_t(_PrintableStructure): + _fields_ = [ + ('bridgeCount', c_uint), + ('bridgeChipInfo', c_nvmlBridgeChipInfo_t * 128), + ] + +class c_nvmlEccErrorCounts_t(_PrintableStructure): + _fields_ = [ + ('l1Cache', c_ulonglong), + ('l2Cache', c_ulonglong), + ('deviceMemory', c_ulonglong), + ('registerFile', c_ulonglong), + ] + +class c_nvmlUtilization_t(_PrintableStructure): + _fields_ = [ + ('gpu', c_uint), + ('memory', c_uint), + ] + _fmt_ = {'': "%d %%"} + +# Added in 2.285 +class c_nvmlHwbcEntry_t(_PrintableStructure): + _fields_ = [ + ('hwbcId', c_uint), + ('firmwareVersion', c_char * 32), + ] + +class c_nvmlValue_t(Union): + _fields_ = [ + ('dVal', c_double), + ('uiVal', c_uint), + ('ulVal', c_ulong), + ('ullVal', c_ulonglong), + ] + +class c_nvmlSample_t(_PrintableStructure): + _fields_ = [ + ('timeStamp', c_ulonglong), + ('sampleValue', c_nvmlValue_t), + ] + +class c_nvmlViolationTime_t(_PrintableStructure): + _fields_ = [ + ('referenceTime', c_ulonglong), + ('violationTime', c_ulonglong), + ] + +## Event structures +class struct_c_nvmlEventSet_t(Structure): + pass # opaque handle +c_nvmlEventSet_t = POINTER(struct_c_nvmlEventSet_t) + +nvmlEventTypeSingleBitEccError = 0x0000000000000001 +nvmlEventTypeDoubleBitEccError = 0x0000000000000002 +nvmlEventTypePState = 0x0000000000000004 +nvmlEventTypeXidCriticalError = 0x0000000000000008 +nvmlEventTypeClock = 0x0000000000000010 +nvmlEventTypeNone = 0x0000000000000000 +nvmlEventTypeAll = ( + nvmlEventTypeNone | + nvmlEventTypeSingleBitEccError | + nvmlEventTypeDoubleBitEccError | + nvmlEventTypePState | + nvmlEventTypeClock | + nvmlEventTypeXidCriticalError + ) + +## Clock Throttle Reasons defines +nvmlClocksThrottleReasonGpuIdle = 0x0000000000000001 +nvmlClocksThrottleReasonApplicationsClocksSetting = 0x0000000000000002 +nvmlClocksThrottleReasonUserDefinedClocks = nvmlClocksThrottleReasonApplicationsClocksSetting # deprecated, use nvmlClocksThrottleReasonApplicationsClocksSetting +nvmlClocksThrottleReasonSwPowerCap = 0x0000000000000004 +nvmlClocksThrottleReasonHwSlowdown = 0x0000000000000008 +nvmlClocksThrottleReasonUnknown = 0x8000000000000000 +nvmlClocksThrottleReasonNone = 0x0000000000000000 +nvmlClocksThrottleReasonAll = ( + nvmlClocksThrottleReasonNone | + nvmlClocksThrottleReasonGpuIdle | + nvmlClocksThrottleReasonApplicationsClocksSetting | + nvmlClocksThrottleReasonSwPowerCap | + nvmlClocksThrottleReasonHwSlowdown | + nvmlClocksThrottleReasonUnknown + ) + +class c_nvmlEventData_t(_PrintableStructure): + _fields_ = [ + ('device', c_nvmlDevice_t), + ('eventType', c_ulonglong), + ('eventData', c_ulonglong) + ] + _fmt_ = {'eventType': "0x%08X"} + +class c_nvmlAccountingStats_t(_PrintableStructure): + _fields_ = [ + ('gpuUtilization', c_uint), + ('memoryUtilization', c_uint), + ('maxMemoryUsage', c_ulonglong), + ('time', c_ulonglong), + ('startTime', c_ulonglong), + ('isRunning', c_uint), + ('reserved', c_uint * 5) + ] + +## C function wrappers ## +def nvmlInit(): + _LoadNvmlLibrary() + + # + # Initialize the library + # + fn = _nvmlGetFunctionPointer("nvmlInit_v2") + ret = fn() + _nvmlCheckReturn(ret) + + # Atomically update refcount + global _nvmlLib_refcount + libLoadLock.acquire() + _nvmlLib_refcount += 1 + libLoadLock.release() + return None + +def _LoadNvmlLibrary(): + ''' + Load the library if it isn't loaded already + ''' + global nvmlLib + + if (nvmlLib == None): + # lock to ensure only one caller loads the library + libLoadLock.acquire() + + try: + # ensure the library still isn't loaded + if (nvmlLib == None): + try: + if (sys.platform[:3] == "win"): + searchPaths = [ + os.path.join(os.getenv("ProgramFiles", r"C:\Program Files"), r"NVIDIA Corporation\NVSMI\nvml.dll"), + os.path.join(os.getenv("WinDir", r"C:\Windows"), r"System32\nvml.dll"), + ] + nvmlPath = next((x for x in searchPaths if os.path.isfile(x)), None) + if (nvmlPath == None): + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + else: + # cdecl calling convention + nvmlLib = CDLL(nvmlPath) + else: + # assume linux + nvmlLib = CDLL("libnvidia-ml.so.1") + except OSError as ose: + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + if (nvmlLib == None): + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + finally: + # lock is always freed + libLoadLock.release() + +def nvmlShutdown(): + # + # Leave the library loaded, but shutdown the interface + # + fn = _nvmlGetFunctionPointer("nvmlShutdown") + ret = fn() + _nvmlCheckReturn(ret) + + # Atomically update refcount + global _nvmlLib_refcount + libLoadLock.acquire() + if (0 < _nvmlLib_refcount): + _nvmlLib_refcount -= 1 + libLoadLock.release() + return None + +# Added in 2.285 +def nvmlErrorString(result): + fn = _nvmlGetFunctionPointer("nvmlErrorString") + fn.restype = c_char_p # otherwise return is an int + ret = fn(result) + return ret + +# Added in 2.285 +def nvmlSystemGetNVMLVersion(): + c_version = create_string_buffer(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlSystemGetNVMLVersion") + ret = fn(c_version, c_uint(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlSystemGetProcessName(pid): + c_name = create_string_buffer(1024) + fn = _nvmlGetFunctionPointer("nvmlSystemGetProcessName") + ret = fn(c_uint(pid), c_name, c_uint(1024)) + _nvmlCheckReturn(ret) + return c_name.value + +def nvmlSystemGetDriverVersion(): + c_version = create_string_buffer(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlSystemGetDriverVersion") + ret = fn(c_version, c_uint(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlSystemGetHicVersion(): + c_count = c_uint(0) + hics = None + fn = _nvmlGetFunctionPointer("nvmlSystemGetHicVersion") + + # get the count + ret = fn(byref(c_count), None) + + # this should only fail with insufficient size + if ((ret != NVML_SUCCESS) and + (ret != NVML_ERROR_INSUFFICIENT_SIZE)): + raise NVMLError(ret) + + # if there are no hics + if (c_count.value == 0): + return [] + + hic_array = c_nvmlHwbcEntry_t * c_count.value + hics = hic_array() + ret = fn(byref(c_count), hics) + _nvmlCheckReturn(ret) + return hics + +## Unit get functions +def nvmlUnitGetCount(): + c_count = c_uint() + fn = _nvmlGetFunctionPointer("nvmlUnitGetCount") + ret = fn(byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlUnitGetHandleByIndex(index): + c_index = c_uint(index) + unit = c_nvmlUnit_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetHandleByIndex") + ret = fn(c_index, byref(unit)) + _nvmlCheckReturn(ret) + return unit + +def nvmlUnitGetUnitInfo(unit): + c_info = c_nvmlUnitInfo_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetUnitInfo") + ret = fn(unit, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlUnitGetLedState(unit): + c_state = c_nvmlLedState_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetLedState") + ret = fn(unit, byref(c_state)) + _nvmlCheckReturn(ret) + return c_state + +def nvmlUnitGetPsuInfo(unit): + c_info = c_nvmlPSUInfo_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetPsuInfo") + ret = fn(unit, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlUnitGetTemperature(unit, type): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlUnitGetTemperature") + ret = fn(unit, c_uint(type), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +def nvmlUnitGetFanSpeedInfo(unit): + c_speeds = c_nvmlUnitFanSpeeds_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetFanSpeedInfo") + ret = fn(unit, byref(c_speeds)) + _nvmlCheckReturn(ret) + return c_speeds + +# added to API +def nvmlUnitGetDeviceCount(unit): + c_count = c_uint(0) + # query the unit to determine device count + fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") + ret = fn(unit, byref(c_count), None) + if (ret == NVML_ERROR_INSUFFICIENT_SIZE): + ret = NVML_SUCCESS + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlUnitGetDevices(unit): + c_count = c_uint(nvmlUnitGetDeviceCount(unit)) + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") + ret = fn(unit, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return c_devices + +## Device get functions +def nvmlDeviceGetCount(): + c_count = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCount_v2") + ret = fn(byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlDeviceGetHandleByIndex(index): + c_index = c_uint(index) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByIndex_v2") + ret = fn(c_index, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleBySerial(serial): + c_serial = c_char_p(serial) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleBySerial") + ret = fn(c_serial, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleByUUID(uuid): + c_uuid = c_char_p(uuid) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByUUID") + ret = fn(c_uuid, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleByPciBusId(pciBusId): + c_busId = c_char_p(pciBusId) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByPciBusId_v2") + ret = fn(c_busId, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetName(handle): + c_name = create_string_buffer(NVML_DEVICE_NAME_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetName") + ret = fn(handle, c_name, c_uint(NVML_DEVICE_NAME_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_name.value + +def nvmlDeviceGetBoardId(handle): + c_id = c_uint(); + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBoardId") + ret = fn(handle, byref(c_id)) + _nvmlCheckReturn(ret) + return c_id.value + +def nvmlDeviceGetMultiGpuBoard(handle): + c_multiGpu = c_uint(); + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMultiGpuBoard") + ret = fn(handle, byref(c_multiGpu)) + _nvmlCheckReturn(ret) + return c_multiGpu.value + +def nvmlDeviceGetBrand(handle): + c_type = _nvmlBrandType_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBrand") + ret = fn(handle, byref(c_type)) + _nvmlCheckReturn(ret) + return c_type.value + +def nvmlDeviceGetSerial(handle): + c_serial = create_string_buffer(NVML_DEVICE_SERIAL_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSerial") + ret = fn(handle, c_serial, c_uint(NVML_DEVICE_SERIAL_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_serial.value + +def nvmlDeviceGetCpuAffinity(handle, cpuSetSize): + affinity_array = c_ulonglong * cpuSetSize + c_affinity = affinity_array() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCpuAffinity") + ret = fn(handle, cpuSetSize, byref(c_affinity)) + _nvmlCheckReturn(ret) + return c_affinity + +def nvmlDeviceSetCpuAffinity(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetCpuAffinity") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearCpuAffinity(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearCpuAffinity") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetMinorNumber(handle): + c_minor_number = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMinorNumber") + ret = fn(handle, byref(c_minor_number)) + _nvmlCheckReturn(ret) + return c_minor_number.value + +def nvmlDeviceGetUUID(handle): + c_uuid = create_string_buffer(NVML_DEVICE_UUID_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetUUID") + ret = fn(handle, c_uuid, c_uint(NVML_DEVICE_UUID_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_uuid.value + +def nvmlDeviceGetInforomVersion(handle, infoRomObject): + c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomVersion") + ret = fn(handle, _nvmlInforomObject_t(infoRomObject), + c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 4.304 +def nvmlDeviceGetInforomImageVersion(handle): + c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomImageVersion") + ret = fn(handle, c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 4.304 +def nvmlDeviceGetInforomConfigurationChecksum(handle): + c_checksum = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomConfigurationChecksum") + ret = fn(handle, byref(c_checksum)) + _nvmlCheckReturn(ret) + return c_checksum.value + +# Added in 4.304 +def nvmlDeviceValidateInforom(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceValidateInforom") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetDisplayMode(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceGetDisplayActive(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayActive") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + + +def nvmlDeviceGetPersistenceMode(handle): + c_state = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPersistenceMode") + ret = fn(handle, byref(c_state)) + _nvmlCheckReturn(ret) + return c_state.value + +def nvmlDeviceGetPciInfo(handle): + c_info = nvmlPciInfo_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPciInfo_v2") + ret = fn(handle, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlDeviceGetClockInfo(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetClockInfo") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 2.285 +def nvmlDeviceGetMaxClockInfo(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxClockInfo") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 4.304 +def nvmlDeviceGetApplicationsClock(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetApplicationsClock") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 5.319 +def nvmlDeviceGetDefaultApplicationsClock(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDefaultApplicationsClock") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 4.304 +def nvmlDeviceGetSupportedMemoryClocks(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedMemoryClocks") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no clocks + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + clocks_array = c_uint * c_count.value + c_clocks = clocks_array() + + # make the call again + ret = fn(handle, byref(c_count), c_clocks) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + procs.append(c_clocks[i]) + + return procs + else: + # error case + raise NVMLError(ret) + +# Added in 4.304 +def nvmlDeviceGetSupportedGraphicsClocks(handle, memoryClockMHz): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedGraphicsClocks") + ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no clocks + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + clocks_array = c_uint * c_count.value + c_clocks = clocks_array() + + # make the call again + ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), c_clocks) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + procs.append(c_clocks[i]) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetFanSpeed(handle): + c_speed = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetFanSpeed") + ret = fn(handle, byref(c_speed)) + _nvmlCheckReturn(ret) + return c_speed.value + +def nvmlDeviceGetTemperature(handle, sensor): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperature") + ret = fn(handle, _nvmlTemperatureSensors_t(sensor), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +def nvmlDeviceGetTemperatureThreshold(handle, threshold): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperatureThreshold") + ret = fn(handle, _nvmlTemperatureThresholds_t(threshold), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +# DEPRECATED use nvmlDeviceGetPerformanceState +def nvmlDeviceGetPowerState(handle): + c_pstate = _nvmlPstates_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerState") + ret = fn(handle, byref(c_pstate)) + _nvmlCheckReturn(ret) + return c_pstate.value + +def nvmlDeviceGetPerformanceState(handle): + c_pstate = _nvmlPstates_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPerformanceState") + ret = fn(handle, byref(c_pstate)) + _nvmlCheckReturn(ret) + return c_pstate.value + +def nvmlDeviceGetPowerManagementMode(handle): + c_pcapMode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementMode") + ret = fn(handle, byref(c_pcapMode)) + _nvmlCheckReturn(ret) + return c_pcapMode.value + +def nvmlDeviceGetPowerManagementLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + +# Added in 4.304 +def nvmlDeviceGetPowerManagementLimitConstraints(handle): + c_minLimit = c_uint() + c_maxLimit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimitConstraints") + ret = fn(handle, byref(c_minLimit), byref(c_maxLimit)) + _nvmlCheckReturn(ret) + return [c_minLimit.value, c_maxLimit.value] + +# Added in 4.304 +def nvmlDeviceGetPowerManagementDefaultLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementDefaultLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + + +# Added in 331 +def nvmlDeviceGetEnforcedPowerLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEnforcedPowerLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + +def nvmlDeviceGetPowerUsage(handle): + c_watts = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerUsage") + ret = fn(handle, byref(c_watts)) + _nvmlCheckReturn(ret) + return c_watts.value + +# Added in 4.304 +def nvmlDeviceGetGpuOperationMode(handle): + c_currState = _nvmlGpuOperationMode_t() + c_pendingState = _nvmlGpuOperationMode_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetGpuOperationMode") + ret = fn(handle, byref(c_currState), byref(c_pendingState)) + _nvmlCheckReturn(ret) + return [c_currState.value, c_pendingState.value] + +# Added in 4.304 +def nvmlDeviceGetCurrentGpuOperationMode(handle): + return nvmlDeviceGetGpuOperationMode(handle)[0] + +# Added in 4.304 +def nvmlDeviceGetPendingGpuOperationMode(handle): + return nvmlDeviceGetGpuOperationMode(handle)[1] + +def nvmlDeviceGetMemoryInfo(handle): + c_memory = c_nvmlMemory_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryInfo") + ret = fn(handle, byref(c_memory)) + _nvmlCheckReturn(ret) + return c_memory + +def nvmlDeviceGetBAR1MemoryInfo(handle): + c_bar1_memory = c_nvmlBAR1Memory_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBAR1MemoryInfo") + ret = fn(handle, byref(c_bar1_memory)) + _nvmlCheckReturn(ret) + return c_bar1_memory + +def nvmlDeviceGetComputeMode(handle): + c_mode = _nvmlComputeMode_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceGetEccMode(handle): + c_currState = _nvmlEnableState_t() + c_pendingState = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEccMode") + ret = fn(handle, byref(c_currState), byref(c_pendingState)) + _nvmlCheckReturn(ret) + return [c_currState.value, c_pendingState.value] + +# added to API +def nvmlDeviceGetCurrentEccMode(handle): + return nvmlDeviceGetEccMode(handle)[0] + +# added to API +def nvmlDeviceGetPendingEccMode(handle): + return nvmlDeviceGetEccMode(handle)[1] + +def nvmlDeviceGetTotalEccErrors(handle, errorType, counterType): + c_count = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTotalEccErrors") + ret = fn(handle, _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +# This is deprecated, instead use nvmlDeviceGetMemoryErrorCounter +def nvmlDeviceGetDetailedEccErrors(handle, errorType, counterType): + c_counts = c_nvmlEccErrorCounts_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDetailedEccErrors") + ret = fn(handle, _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), byref(c_counts)) + _nvmlCheckReturn(ret) + return c_counts + +# Added in 4.304 +def nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, locationType): + c_count = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryErrorCounter") + ret = fn(handle, + _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), + _nvmlMemoryLocation_t(locationType), + byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlDeviceGetUtilizationRates(handle): + c_util = c_nvmlUtilization_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetUtilizationRates") + ret = fn(handle, byref(c_util)) + _nvmlCheckReturn(ret) + return c_util + +def nvmlDeviceGetEncoderUtilization(handle): + c_util = c_uint() + c_samplingPeriod = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEncoderUtilization") + ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) + _nvmlCheckReturn(ret) + return [c_util.value, c_samplingPeriod.value] + +def nvmlDeviceGetDecoderUtilization(handle): + c_util = c_uint() + c_samplingPeriod = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDecoderUtilization") + ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) + _nvmlCheckReturn(ret) + return [c_util.value, c_samplingPeriod.value] + +def nvmlDeviceGetPcieReplayCounter(handle): + c_replay = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieReplayCounter") + ret = fn(handle, byref(c_replay)) + _nvmlCheckReturn(ret) + return c_replay.value + +def nvmlDeviceGetDriverModel(handle): + c_currModel = _nvmlDriverModel_t() + c_pendingModel = _nvmlDriverModel_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDriverModel") + ret = fn(handle, byref(c_currModel), byref(c_pendingModel)) + _nvmlCheckReturn(ret) + return [c_currModel.value, c_pendingModel.value] + +# added to API +def nvmlDeviceGetCurrentDriverModel(handle): + return nvmlDeviceGetDriverModel(handle)[0] + +# added to API +def nvmlDeviceGetPendingDriverModel(handle): + return nvmlDeviceGetDriverModel(handle)[1] + +# Added in 2.285 +def nvmlDeviceGetVbiosVersion(handle): + c_version = create_string_buffer(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetVbiosVersion") + ret = fn(handle, c_version, c_uint(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlDeviceGetComputeRunningProcesses(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no running processes + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + # oversize the array incase more processes are created + c_count.value = c_count.value * 2 + 5 + proc_array = c_nvmlProcessInfo_t * c_count.value + c_procs = proc_array() + + # make the call again + ret = fn(handle, byref(c_count), c_procs) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + # use an alternative struct for this object + obj = nvmlStructToFriendlyObject(c_procs[i]) + if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + obj.usedGpuMemory = None + procs.append(obj) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetGraphicsRunningProcesses(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetGraphicsRunningProcesses") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no running processes + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + # oversize the array incase more processes are created + c_count.value = c_count.value * 2 + 5 + proc_array = c_nvmlProcessInfo_t * c_count.value + c_procs = proc_array() + + # make the call again + ret = fn(handle, byref(c_count), c_procs) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + # use an alternative struct for this object + obj = nvmlStructToFriendlyObject(c_procs[i]) + if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + obj.usedGpuMemory = None + procs.append(obj) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetAutoBoostedClocksEnabled(handle): + c_isEnabled = _nvmlEnableState_t() + c_defaultIsEnabled = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAutoBoostedClocksEnabled") + ret = fn(handle, byref(c_isEnabled), byref(c_defaultIsEnabled)) + _nvmlCheckReturn(ret) + return [c_isEnabled.value, c_defaultIsEnabled.value] + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +## Set functions +def nvmlUnitSetLedState(unit, color): + fn = _nvmlGetFunctionPointer("nvmlUnitSetLedState") + ret = fn(unit, _nvmlLedColor_t(color)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetPersistenceMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetPersistenceMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetComputeMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetComputeMode") + ret = fn(handle, _nvmlComputeMode_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetEccMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetEccMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearEccErrorCounts(handle, counterType): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearEccErrorCounts") + ret = fn(handle, _nvmlEccCounterType_t(counterType)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetDriverModel(handle, model): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetDriverModel") + ret = fn(handle, _nvmlDriverModel_t(model)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetAutoBoostedClocksEnabled(handle, enabled): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAutoBoostedClocksEnabled") + ret = fn(handle, _nvmlEnableState_t(enabled)) + _nvmlCheckReturn(ret) + return None + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +def nvmlDeviceSetDefaultAutoBoostedClocksEnabled(handle, enabled, flags): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetDefaultAutoBoostedClocksEnabled") + ret = fn(handle, _nvmlEnableState_t(enabled), c_uint(flags)) + _nvmlCheckReturn(ret) + return None + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +# Added in 4.304 +def nvmlDeviceSetApplicationsClocks(handle, maxMemClockMHz, maxGraphicsClockMHz): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetApplicationsClocks") + ret = fn(handle, c_uint(maxMemClockMHz), c_uint(maxGraphicsClockMHz)) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceResetApplicationsClocks(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceResetApplicationsClocks") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceSetPowerManagementLimit(handle, limit): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetPowerManagementLimit") + ret = fn(handle, c_uint(limit)) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceSetGpuOperationMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetGpuOperationMode") + ret = fn(handle, _nvmlGpuOperationMode_t(mode)) + _nvmlCheckReturn(ret) + return None + +# Added in 2.285 +def nvmlEventSetCreate(): + fn = _nvmlGetFunctionPointer("nvmlEventSetCreate") + eventSet = c_nvmlEventSet_t() + ret = fn(byref(eventSet)) + _nvmlCheckReturn(ret) + return eventSet + +# Added in 2.285 +def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet): + fn = _nvmlGetFunctionPointer("nvmlDeviceRegisterEvents") + ret = fn(handle, c_ulonglong(eventTypes), eventSet) + _nvmlCheckReturn(ret) + return None + +# Added in 2.285 +def nvmlDeviceGetSupportedEventTypes(handle): + c_eventTypes = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedEventTypes") + ret = fn(handle, byref(c_eventTypes)) + _nvmlCheckReturn(ret) + return c_eventTypes.value + +# Added in 2.285 +# raises NVML_ERROR_TIMEOUT exception on timeout +def nvmlEventSetWait(eventSet, timeoutms): + fn = _nvmlGetFunctionPointer("nvmlEventSetWait") + data = c_nvmlEventData_t() + ret = fn(eventSet, byref(data), c_uint(timeoutms)) + _nvmlCheckReturn(ret) + return data + +# Added in 2.285 +def nvmlEventSetFree(eventSet): + fn = _nvmlGetFunctionPointer("nvmlEventSetFree") + ret = fn(eventSet) + _nvmlCheckReturn(ret) + return None + +# Added in 3.295 +def nvmlDeviceOnSameBoard(handle1, handle2): + fn = _nvmlGetFunctionPointer("nvmlDeviceOnSameBoard") + onSameBoard = c_int() + ret = fn(handle1, handle2, byref(onSameBoard)) + _nvmlCheckReturn(ret) + return (onSameBoard.value != 0) + +# Added in 3.295 +def nvmlDeviceGetCurrPcieLinkGeneration(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkGeneration") + gen = c_uint() + ret = fn(handle, byref(gen)) + _nvmlCheckReturn(ret) + return gen.value + +# Added in 3.295 +def nvmlDeviceGetMaxPcieLinkGeneration(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkGeneration") + gen = c_uint() + ret = fn(handle, byref(gen)) + _nvmlCheckReturn(ret) + return gen.value + +# Added in 3.295 +def nvmlDeviceGetCurrPcieLinkWidth(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkWidth") + width = c_uint() + ret = fn(handle, byref(width)) + _nvmlCheckReturn(ret) + return width.value + +# Added in 3.295 +def nvmlDeviceGetMaxPcieLinkWidth(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkWidth") + width = c_uint() + ret = fn(handle, byref(width)) + _nvmlCheckReturn(ret) + return width.value + +# Added in 4.304 +def nvmlDeviceGetSupportedClocksThrottleReasons(handle): + c_reasons= c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedClocksThrottleReasons") + ret = fn(handle, byref(c_reasons)) + _nvmlCheckReturn(ret) + return c_reasons.value + +# Added in 4.304 +def nvmlDeviceGetCurrentClocksThrottleReasons(handle): + c_reasons= c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrentClocksThrottleReasons") + ret = fn(handle, byref(c_reasons)) + _nvmlCheckReturn(ret) + return c_reasons.value + +# Added in 5.319 +def nvmlDeviceGetIndex(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetIndex") + c_index = c_uint() + ret = fn(handle, byref(c_index)) + _nvmlCheckReturn(ret) + return c_index.value + +# Added in 5.319 +def nvmlDeviceGetAccountingMode(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceSetAccountingMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAccountingMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearAccountingPids(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearAccountingPids") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetAccountingStats(handle, pid): + stats = c_nvmlAccountingStats_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingStats") + ret = fn(handle, c_uint(pid), byref(stats)) + _nvmlCheckReturn(ret) + if (stats.maxMemoryUsage == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + stats.maxMemoryUsage = None + return stats + +def nvmlDeviceGetAccountingPids(handle): + count = c_uint(nvmlDeviceGetAccountingBufferSize(handle)) + pids = (c_uint * count.value)() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingPids") + ret = fn(handle, byref(count), pids) + _nvmlCheckReturn(ret) + return map(int, pids[0:count.value]) + +def nvmlDeviceGetAccountingBufferSize(handle): + bufferSize = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingBufferSize") + ret = fn(handle, byref(bufferSize)) + _nvmlCheckReturn(ret) + return int(bufferSize.value) + +def nvmlDeviceGetRetiredPages(device, sourceFilter): + c_source = _nvmlPageRetirementCause_t(sourceFilter) + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPages") + + # First call will get the size + ret = fn(device, c_source, byref(c_count), None) + + # this should only fail with insufficient size + if ((ret != NVML_SUCCESS) and + (ret != NVML_ERROR_INSUFFICIENT_SIZE)): + raise NVMLError(ret) + + # call again with a buffer + # oversize the array for the rare cases where additional pages + # are retired between NVML calls + c_count.value = c_count.value * 2 + 5 + page_array = c_ulonglong * c_count.value + c_pages = page_array() + ret = fn(device, c_source, byref(c_count), c_pages) + _nvmlCheckReturn(ret) + return map(int, c_pages[0:c_count.value]) + +def nvmlDeviceGetRetiredPagesPendingStatus(device): + c_pending = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPagesPendingStatus") + ret = fn(device, byref(c_pending)) + _nvmlCheckReturn(ret) + return int(c_pending.value) + +def nvmlDeviceGetAPIRestriction(device, apiType): + c_permission = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAPIRestriction") + ret = fn(device, _nvmlRestrictedAPI_t(apiType), byref(c_permission)) + _nvmlCheckReturn(ret) + return int(c_permission.value) + +def nvmlDeviceSetAPIRestriction(handle, apiType, isRestricted): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAPIRestriction") + ret = fn(handle, _nvmlRestrictedAPI_t(apiType), _nvmlEnableState_t(isRestricted)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetBridgeChipInfo(handle): + bridgeHierarchy = c_nvmlBridgeChipHierarchy_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBridgeChipInfo") + ret = fn(handle, byref(bridgeHierarchy)) + _nvmlCheckReturn(ret) + return bridgeHierarchy + +def nvmlDeviceGetSamples(device, sampling_type, timeStamp): + c_sampling_type = _nvmlSamplingType_t(sampling_type) + c_time_stamp = c_ulonglong(timeStamp) + c_sample_count = c_uint(0) + c_sample_value_type = _nvmlValueType_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSamples") + + ## First Call gets the size + ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), None) + + # Stop if this fails + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + + sampleArray = c_sample_count.value * c_nvmlSample_t + c_samples = sampleArray() + ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), c_samples) + _nvmlCheckReturn(ret) + return (c_sample_value_type.value, c_samples[0:c_sample_count.value]) + +def nvmlDeviceGetViolationStatus(device, perfPolicyType): + c_perfPolicy_type = _nvmlPerfPolicyType_t(perfPolicyType) + c_violTime = c_nvmlViolationTime_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetViolationStatus") + + ## Invoke the method to get violation time + ret = fn(device, c_perfPolicy_type, byref(c_violTime)) + _nvmlCheckReturn(ret) + return c_violTime + +def nvmlDeviceGetPcieThroughput(device, counter): + c_util = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieThroughput") + ret = fn(device, _nvmlPcieUtilCounter_t(counter), byref(c_util)) + _nvmlCheckReturn(ret) + return c_util.value + +def nvmlSystemGetTopologyGpuSet(cpuNumber): + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlSystemGetTopologyGpuSet") + + # First call will get the size + ret = fn(cpuNumber, byref(c_count), None) + + if ret != NVML_SUCCESS: + raise NVMLError(ret) + print(c_count.value) + # call again with a buffer + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + ret = fn(cpuNumber, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return map(None, c_devices[0:c_count.value]) + +def nvmlDeviceGetTopologyNearestGpus(device, level): + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyNearestGpus") + + # First call will get the size + ret = fn(device, level, byref(c_count), None) + + if ret != NVML_SUCCESS: + raise NVMLError(ret) + + # call again with a buffer + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + ret = fn(device, level, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return map(None, c_devices[0:c_count.value]) + +def nvmlDeviceGetTopologyCommonAncestor(device1, device2): + c_level = _nvmlGpuTopologyLevel_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyCommonAncestor") + ret = fn(device1, device2, byref(c_level)) + _nvmlCheckReturn(ret) + return c_level.value + +#DeepFaceLab additions +def nvmlDeviceGetCudaComputeCapability(device): + c_major = c_int() + c_minor = c_int() + + try: + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCudaComputeCapability") + except: + return 9, 9 + + # get the count + ret = fn(device, byref(c_major), byref(c_minor)) + + # this should only fail with insufficient size + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + return c_major.value, c_minor.value \ No newline at end of file diff --git a/requirements-colab.txt b/requirements-colab.txt index 5edac84..7c69191 100644 --- a/requirements-colab.txt +++ b/requirements-colab.txt @@ -1,10 +1,10 @@ -numpy==1.16.3 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.0.0.21 -tensorflow-gpu==1.13.1 -plaidml-keras==0.5.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 +numpy==1.16.3 +h5py==2.9.0 +Keras==2.2.4 +opencv-python==4.0.0.21 +tensorflow-gpu==1.13.1 +plaidml-keras==0.5.0 +scikit-image +tqdm +ffmpeg-python==0.1.17 git+https://www.github.com/keras-team/keras-contrib.git \ No newline at end of file diff --git a/requirements-cpu.txt b/requirements-cpu.txt index dcd38c5..6f5f27f 100644 --- a/requirements-cpu.txt +++ b/requirements-cpu.txt @@ -1,9 +1,9 @@ -numpy==1.16.3 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.0.0.21 -tensorflow==1.12.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git +numpy==1.16.3 +h5py==2.9.0 +Keras==2.2.4 +opencv-python==4.0.0.21 +tensorflow==1.12.0 +scikit-image +tqdm +ffmpeg-python==0.1.17 +git+https://www.github.com/keras-team/keras-contrib.git diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 2017ecf..a62ef4a 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -1,11 +1,11 @@ -numpy==1.16.3 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.0.0.21 -tensorflow-gpu==1.14.0 -plaidml==0.6.0 -plaidml-keras==0.5.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git +numpy==1.16.3 +h5py==2.9.0 +Keras==2.2.4 +opencv-python==4.0.0.21 +tensorflow-gpu==1.14.0 +plaidml==0.6.0 +plaidml-keras==0.5.0 +scikit-image +tqdm +ffmpeg-python==0.1.17 +git+https://www.github.com/keras-team/keras-contrib.git diff --git a/requirements-opencl.txt b/requirements-opencl.txt index 12bc049..99a13c1 100644 --- a/requirements-opencl.txt +++ b/requirements-opencl.txt @@ -1,11 +1,11 @@ -numpy==1.16.3 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.0.0.21 -tensorflow==1.12.0 -plaidml==0.6.0 -plaidml-keras==0.5.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git +numpy==1.16.3 +h5py==2.9.0 +Keras==2.2.4 +opencv-python==4.0.0.21 +tensorflow==1.12.0 +plaidml==0.6.0 +plaidml-keras==0.5.0 +scikit-image +tqdm +ffmpeg-python==0.1.17 +git+https://www.github.com/keras-team/keras-contrib.git diff --git a/samplelib/Sample.py b/samplelib/Sample.py index cde8b62..d22a869 100644 --- a/samplelib/Sample.py +++ b/samplelib/Sample.py @@ -1,74 +1,74 @@ -from enum import IntEnum -from pathlib import Path - -import cv2 -import numpy as np - -from utils.cv2_utils import * -from utils.DFLJPG import DFLJPG -from utils.DFLPNG import DFLPNG - - -class SampleType(IntEnum): - IMAGE = 0 #raw image - - FACE_BEGIN = 1 - FACE = 1 #aligned face unsorted - FACE_YAW_SORTED = 2 #sorted by yaw - FACE_YAW_SORTED_AS_TARGET = 3 #sorted by yaw and included only yaws which exist in TARGET also automatic mirrored - FACE_TEMPORAL_SORTED = 4 - FACE_END = 4 - - QTY = 5 - -class Sample(object): - def __init__(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, ie_polys=None, pitch_yaw_roll=None, source_filename=None, mirror=None, close_target_list=None, fanseg_mask_exist=False): - self.sample_type = sample_type if sample_type is not None else SampleType.IMAGE - self.filename = filename - self.face_type = face_type - self.shape = shape - self.landmarks = np.array(landmarks) if landmarks is not None else None - self.ie_polys = ie_polys - self.pitch_yaw_roll = pitch_yaw_roll - self.source_filename = source_filename - self.mirror = mirror - self.close_target_list = close_target_list - self.fanseg_mask_exist = fanseg_mask_exist - - def copy_and_set(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, ie_polys=None, pitch_yaw_roll=None, source_filename=None, mirror=None, close_target_list=None, fanseg_mask=None, fanseg_mask_exist=None): - return Sample( - sample_type=sample_type if sample_type is not None else self.sample_type, - filename=filename if filename is not None else self.filename, - face_type=face_type if face_type is not None else self.face_type, - shape=shape if shape is not None else self.shape, - landmarks=landmarks if landmarks is not None else self.landmarks.copy(), - ie_polys=ie_polys if ie_polys is not None else self.ie_polys, - pitch_yaw_roll=pitch_yaw_roll if pitch_yaw_roll is not None else self.pitch_yaw_roll, - source_filename=source_filename if source_filename is not None else self.source_filename, - mirror=mirror if mirror is not None else self.mirror, - close_target_list=close_target_list if close_target_list is not None else self.close_target_list, - fanseg_mask_exist=fanseg_mask_exist if fanseg_mask_exist is not None else self.fanseg_mask_exist) - - def load_bgr(self): - img = cv2_imread (self.filename).astype(np.float32) / 255.0 - if self.mirror: - img = img[:,::-1].copy() - return img - - def load_fanseg_mask(self): - if self.fanseg_mask_exist: - filepath = Path(self.filename) - if filepath.suffix == '.png': - dflimg = DFLPNG.load ( str(filepath) ) - elif filepath.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filepath) ) - else: - dflimg = None - return dflimg.get_fanseg_mask() - - return None - - def get_random_close_target_sample(self): - if self.close_target_list is None: - return None - return self.close_target_list[randint (0, len(self.close_target_list)-1)] +from enum import IntEnum +from pathlib import Path + +import cv2 +import numpy as np + +from utils.cv2_utils import * +from utils.DFLJPG import DFLJPG +from utils.DFLPNG import DFLPNG + + +class SampleType(IntEnum): + IMAGE = 0 #raw image + + FACE_BEGIN = 1 + FACE = 1 #aligned face unsorted + FACE_YAW_SORTED = 2 #sorted by yaw + FACE_YAW_SORTED_AS_TARGET = 3 #sorted by yaw and included only yaws which exist in TARGET also automatic mirrored + FACE_TEMPORAL_SORTED = 4 + FACE_END = 4 + + QTY = 5 + +class Sample(object): + def __init__(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, ie_polys=None, pitch_yaw_roll=None, source_filename=None, mirror=None, close_target_list=None, fanseg_mask_exist=False): + self.sample_type = sample_type if sample_type is not None else SampleType.IMAGE + self.filename = filename + self.face_type = face_type + self.shape = shape + self.landmarks = np.array(landmarks) if landmarks is not None else None + self.ie_polys = ie_polys + self.pitch_yaw_roll = pitch_yaw_roll + self.source_filename = source_filename + self.mirror = mirror + self.close_target_list = close_target_list + self.fanseg_mask_exist = fanseg_mask_exist + + def copy_and_set(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, ie_polys=None, pitch_yaw_roll=None, source_filename=None, mirror=None, close_target_list=None, fanseg_mask=None, fanseg_mask_exist=None): + return Sample( + sample_type=sample_type if sample_type is not None else self.sample_type, + filename=filename if filename is not None else self.filename, + face_type=face_type if face_type is not None else self.face_type, + shape=shape if shape is not None else self.shape, + landmarks=landmarks if landmarks is not None else self.landmarks.copy(), + ie_polys=ie_polys if ie_polys is not None else self.ie_polys, + pitch_yaw_roll=pitch_yaw_roll if pitch_yaw_roll is not None else self.pitch_yaw_roll, + source_filename=source_filename if source_filename is not None else self.source_filename, + mirror=mirror if mirror is not None else self.mirror, + close_target_list=close_target_list if close_target_list is not None else self.close_target_list, + fanseg_mask_exist=fanseg_mask_exist if fanseg_mask_exist is not None else self.fanseg_mask_exist) + + def load_bgr(self): + img = cv2_imread (self.filename).astype(np.float32) / 255.0 + if self.mirror: + img = img[:,::-1].copy() + return img + + def load_fanseg_mask(self): + if self.fanseg_mask_exist: + filepath = Path(self.filename) + if filepath.suffix == '.png': + dflimg = DFLPNG.load ( str(filepath) ) + elif filepath.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filepath) ) + else: + dflimg = None + return dflimg.get_fanseg_mask() + + return None + + def get_random_close_target_sample(self): + if self.close_target_list is None: + return None + return self.close_target_list[randint (0, len(self.close_target_list)-1)] diff --git a/samplelib/SampleGeneratorBase.py b/samplelib/SampleGeneratorBase.py index 42e9930..dec741e 100644 --- a/samplelib/SampleGeneratorBase.py +++ b/samplelib/SampleGeneratorBase.py @@ -1,24 +1,24 @@ -from pathlib import Path - -''' -You can implement your own SampleGenerator -''' -class SampleGeneratorBase(object): - - - def __init__ (self, samples_path, debug, batch_size): - if samples_path is None: - raise Exception('samples_path is None') - - self.samples_path = Path(samples_path) - self.debug = debug - self.batch_size = 1 if self.debug else batch_size - - #overridable - def __iter__(self): - #implement your own iterator - return self - - def __next__(self): - #implement your own iterator - return None +from pathlib import Path + +''' +You can implement your own SampleGenerator +''' +class SampleGeneratorBase(object): + + + def __init__ (self, samples_path, debug, batch_size): + if samples_path is None: + raise Exception('samples_path is None') + + self.samples_path = Path(samples_path) + self.debug = debug + self.batch_size = 1 if self.debug else batch_size + + #overridable + def __iter__(self): + #implement your own iterator + return self + + def __next__(self): + #implement your own iterator + return None diff --git a/samplelib/SampleGeneratorFace.py b/samplelib/SampleGeneratorFace.py index 095d955..593f8e2 100644 --- a/samplelib/SampleGeneratorFace.py +++ b/samplelib/SampleGeneratorFace.py @@ -1,142 +1,142 @@ -import multiprocessing -import traceback - -import cv2 -import numpy as np - -from facelib import LandmarksProcessor -from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor, - SampleType) -from utils import iter_utils - - -''' -arg -output_sample_types = [ - [SampleProcessor.TypeFlags, size, (optional) {} opts ] , - ... - ] -''' -class SampleGeneratorFace(SampleGeneratorBase): - def __init__ (self, samples_path, debug, batch_size, sort_by_yaw=False, sort_by_yaw_target_samples_path=None, random_ct_samples_path=None, sample_process_options=SampleProcessor.Options(), output_sample_types=[], add_sample_idx=False, generators_count=2, generators_random_seed=None, **kwargs): - super().__init__(samples_path, debug, batch_size) - self.sample_process_options = sample_process_options - self.output_sample_types = output_sample_types - self.add_sample_idx = add_sample_idx - - if sort_by_yaw_target_samples_path is not None: - self.sample_type = SampleType.FACE_YAW_SORTED_AS_TARGET - elif sort_by_yaw: - self.sample_type = SampleType.FACE_YAW_SORTED - else: - self.sample_type = SampleType.FACE - - if generators_random_seed is not None and len(generators_random_seed) != generators_count: - raise ValueError("len(generators_random_seed) != generators_count") - - self.generators_random_seed = generators_random_seed - - samples = SampleLoader.load (self.sample_type, self.samples_path, sort_by_yaw_target_samples_path) - - ct_samples = SampleLoader.load (SampleType.FACE, random_ct_samples_path) if random_ct_samples_path is not None else None - self.random_ct_sample_chance = 100 - - if self.debug: - self.generators_count = 1 - self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (0, samples, ct_samples) )] - else: - self.generators_count = min ( generators_count, len(samples) ) - self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (i, samples[i::self.generators_count], ct_samples ) ) for i in range(self.generators_count) ] - - self.generator_counter = -1 - - def __iter__(self): - return self - - def __next__(self): - self.generator_counter += 1 - generator = self.generators[self.generator_counter % len(self.generators) ] - return next(generator) - - def batch_func(self, param ): - generator_id, samples, ct_samples = param - - if self.generators_random_seed is not None: - np.random.seed ( self.generators_random_seed[generator_id] ) - - samples_len = len(samples) - samples_idxs = [*range(samples_len)] - - ct_samples_len = len(ct_samples) if ct_samples is not None else 0 - - if len(samples_idxs) == 0: - raise ValueError('No training data provided.') - - if self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: - if all ( [ samples[idx] == None for idx in samples_idxs] ): - raise ValueError('Not enough training data. Gather more faces!') - - if self.sample_type == SampleType.FACE: - shuffle_idxs = [] - elif self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: - shuffle_idxs = [] - shuffle_idxs_2D = [[]]*samples_len - - while True: - batches = None - for n_batch in range(self.batch_size): - while True: - sample = None - - if self.sample_type == SampleType.FACE: - if len(shuffle_idxs) == 0: - shuffle_idxs = samples_idxs.copy() - np.random.shuffle(shuffle_idxs) - - idx = shuffle_idxs.pop() - sample = samples[ idx ] - - elif self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: - if len(shuffle_idxs) == 0: - shuffle_idxs = samples_idxs.copy() - np.random.shuffle(shuffle_idxs) - - idx = shuffle_idxs.pop() - if samples[idx] != None: - if len(shuffle_idxs_2D[idx]) == 0: - a = shuffle_idxs_2D[idx] = [ *range(len(samples[idx])) ] - np.random.shuffle (a) - - idx2 = shuffle_idxs_2D[idx].pop() - sample = samples[idx][idx2] - - idx = (idx << 16) | (idx2 & 0xFFFF) - - if sample is not None: - try: - ct_sample=None - if ct_samples is not None: - if np.random.randint(100) < self.random_ct_sample_chance: - ct_sample=ct_samples[np.random.randint(ct_samples_len)] - - x = SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug, ct_sample=ct_sample) - except: - raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) - - if type(x) != tuple and type(x) != list: - raise Exception('SampleProcessor.process returns NOT tuple/list') - - if batches is None: - batches = [ [] for _ in range(len(x)) ] - if self.add_sample_idx: - batches += [ [] ] - i_sample_idx = len(batches)-1 - - for i in range(len(x)): - batches[i].append ( x[i] ) - - if self.add_sample_idx: - batches[i_sample_idx].append (idx) - - break - yield [ np.array(batch) for batch in batches] +import multiprocessing +import traceback + +import cv2 +import numpy as np + +from facelib import LandmarksProcessor +from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor, + SampleType) +from utils import iter_utils + + +''' +arg +output_sample_types = [ + [SampleProcessor.TypeFlags, size, (optional) {} opts ] , + ... + ] +''' +class SampleGeneratorFace(SampleGeneratorBase): + def __init__ (self, samples_path, debug, batch_size, sort_by_yaw=False, sort_by_yaw_target_samples_path=None, random_ct_samples_path=None, sample_process_options=SampleProcessor.Options(), output_sample_types=[], add_sample_idx=False, generators_count=2, generators_random_seed=None, **kwargs): + super().__init__(samples_path, debug, batch_size) + self.sample_process_options = sample_process_options + self.output_sample_types = output_sample_types + self.add_sample_idx = add_sample_idx + + if sort_by_yaw_target_samples_path is not None: + self.sample_type = SampleType.FACE_YAW_SORTED_AS_TARGET + elif sort_by_yaw: + self.sample_type = SampleType.FACE_YAW_SORTED + else: + self.sample_type = SampleType.FACE + + if generators_random_seed is not None and len(generators_random_seed) != generators_count: + raise ValueError("len(generators_random_seed) != generators_count") + + self.generators_random_seed = generators_random_seed + + samples = SampleLoader.load (self.sample_type, self.samples_path, sort_by_yaw_target_samples_path) + + ct_samples = SampleLoader.load (SampleType.FACE, random_ct_samples_path) if random_ct_samples_path is not None else None + self.random_ct_sample_chance = 100 + + if self.debug: + self.generators_count = 1 + self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (0, samples, ct_samples) )] + else: + self.generators_count = min ( generators_count, len(samples) ) + self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (i, samples[i::self.generators_count], ct_samples ) ) for i in range(self.generators_count) ] + + self.generator_counter = -1 + + def __iter__(self): + return self + + def __next__(self): + self.generator_counter += 1 + generator = self.generators[self.generator_counter % len(self.generators) ] + return next(generator) + + def batch_func(self, param ): + generator_id, samples, ct_samples = param + + if self.generators_random_seed is not None: + np.random.seed ( self.generators_random_seed[generator_id] ) + + samples_len = len(samples) + samples_idxs = [*range(samples_len)] + + ct_samples_len = len(ct_samples) if ct_samples is not None else 0 + + if len(samples_idxs) == 0: + raise ValueError('No training data provided.') + + if self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: + if all ( [ samples[idx] == None for idx in samples_idxs] ): + raise ValueError('Not enough training data. Gather more faces!') + + if self.sample_type == SampleType.FACE: + shuffle_idxs = [] + elif self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: + shuffle_idxs = [] + shuffle_idxs_2D = [[]]*samples_len + + while True: + batches = None + for n_batch in range(self.batch_size): + while True: + sample = None + + if self.sample_type == SampleType.FACE: + if len(shuffle_idxs) == 0: + shuffle_idxs = samples_idxs.copy() + np.random.shuffle(shuffle_idxs) + + idx = shuffle_idxs.pop() + sample = samples[ idx ] + + elif self.sample_type == SampleType.FACE_YAW_SORTED or self.sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: + if len(shuffle_idxs) == 0: + shuffle_idxs = samples_idxs.copy() + np.random.shuffle(shuffle_idxs) + + idx = shuffle_idxs.pop() + if samples[idx] != None: + if len(shuffle_idxs_2D[idx]) == 0: + a = shuffle_idxs_2D[idx] = [ *range(len(samples[idx])) ] + np.random.shuffle (a) + + idx2 = shuffle_idxs_2D[idx].pop() + sample = samples[idx][idx2] + + idx = (idx << 16) | (idx2 & 0xFFFF) + + if sample is not None: + try: + ct_sample=None + if ct_samples is not None: + if np.random.randint(100) < self.random_ct_sample_chance: + ct_sample=ct_samples[np.random.randint(ct_samples_len)] + + x = SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug, ct_sample=ct_sample) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) + + if type(x) != tuple and type(x) != list: + raise Exception('SampleProcessor.process returns NOT tuple/list') + + if batches is None: + batches = [ [] for _ in range(len(x)) ] + if self.add_sample_idx: + batches += [ [] ] + i_sample_idx = len(batches)-1 + + for i in range(len(x)): + batches[i].append ( x[i] ) + + if self.add_sample_idx: + batches[i_sample_idx].append (idx) + + break + yield [ np.array(batch) for batch in batches] diff --git a/samplelib/SampleGeneratorFaceTemporal.py b/samplelib/SampleGeneratorFaceTemporal.py index 559cbb7..cf670d2 100644 --- a/samplelib/SampleGeneratorFaceTemporal.py +++ b/samplelib/SampleGeneratorFaceTemporal.py @@ -1,84 +1,84 @@ -import traceback -import numpy as np -import cv2 - -from utils import iter_utils - -from samplelib import SampleType, SampleProcessor, SampleLoader, SampleGeneratorBase - -''' -output_sample_types = [ - [SampleProcessor.TypeFlags, size, (optional)random_sub_size] , - ... - ] -''' -class SampleGeneratorFaceTemporal(SampleGeneratorBase): - def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], generators_count=2, **kwargs): - super().__init__(samples_path, debug, batch_size) - - self.temporal_image_count = temporal_image_count - self.sample_process_options = sample_process_options - self.output_sample_types = output_sample_types - - self.samples = SampleLoader.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path) - - if self.debug: - self.generators_count = 1 - self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] - else: - self.generators_count = min ( generators_count, len(self.samples) ) - self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, i ) for i in range(self.generators_count) ] - - self.generator_counter = -1 - - def __iter__(self): - return self - - def __next__(self): - self.generator_counter += 1 - generator = self.generators[self.generator_counter % len(self.generators) ] - return next(generator) - - def batch_func(self, generator_id): - samples = self.samples - samples_len = len(samples) - if samples_len == 0: - raise ValueError('No training data provided.') - - mult_max = 1 - l = samples_len - (self.temporal_image_count-1)*mult_max + 1 - - samples_idxs = [ *range(l) ] [generator_id::self.generators_count] - - if len(samples_idxs) - self.temporal_image_count < 0: - raise ValueError('Not enough samples to fit temporal line.') - - shuffle_idxs = [] - - while True: - - batches = None - for n_batch in range(self.batch_size): - - if len(shuffle_idxs) == 0: - shuffle_idxs = samples_idxs.copy() - np.random.shuffle (shuffle_idxs) - - idx = shuffle_idxs.pop() - - temporal_samples = [] - mult = np.random.randint(mult_max) - for i in range( self.temporal_image_count ): - sample = samples[ idx+i*mult ] - try: - temporal_samples += SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug) - except: - raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) - - if batches is None: - batches = [ [] for _ in range(len(temporal_samples)) ] - - for i in range(len(temporal_samples)): - batches[i].append ( temporal_samples[i] ) - - yield [ np.array(batch) for batch in batches] +import traceback +import numpy as np +import cv2 + +from utils import iter_utils + +from samplelib import SampleType, SampleProcessor, SampleLoader, SampleGeneratorBase + +''' +output_sample_types = [ + [SampleProcessor.TypeFlags, size, (optional)random_sub_size] , + ... + ] +''' +class SampleGeneratorFaceTemporal(SampleGeneratorBase): + def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], generators_count=2, **kwargs): + super().__init__(samples_path, debug, batch_size) + + self.temporal_image_count = temporal_image_count + self.sample_process_options = sample_process_options + self.output_sample_types = output_sample_types + + self.samples = SampleLoader.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path) + + if self.debug: + self.generators_count = 1 + self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] + else: + self.generators_count = min ( generators_count, len(self.samples) ) + self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, i ) for i in range(self.generators_count) ] + + self.generator_counter = -1 + + def __iter__(self): + return self + + def __next__(self): + self.generator_counter += 1 + generator = self.generators[self.generator_counter % len(self.generators) ] + return next(generator) + + def batch_func(self, generator_id): + samples = self.samples + samples_len = len(samples) + if samples_len == 0: + raise ValueError('No training data provided.') + + mult_max = 1 + l = samples_len - (self.temporal_image_count-1)*mult_max + 1 + + samples_idxs = [ *range(l) ] [generator_id::self.generators_count] + + if len(samples_idxs) - self.temporal_image_count < 0: + raise ValueError('Not enough samples to fit temporal line.') + + shuffle_idxs = [] + + while True: + + batches = None + for n_batch in range(self.batch_size): + + if len(shuffle_idxs) == 0: + shuffle_idxs = samples_idxs.copy() + np.random.shuffle (shuffle_idxs) + + idx = shuffle_idxs.pop() + + temporal_samples = [] + mult = np.random.randint(mult_max) + for i in range( self.temporal_image_count ): + sample = samples[ idx+i*mult ] + try: + temporal_samples += SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) + + if batches is None: + batches = [ [] for _ in range(len(temporal_samples)) ] + + for i in range(len(temporal_samples)): + batches[i].append ( temporal_samples[i] ) + + yield [ np.array(batch) for batch in batches] diff --git a/samplelib/SampleGeneratorImageTemporal.py b/samplelib/SampleGeneratorImageTemporal.py index 0e5b238..190f98d 100644 --- a/samplelib/SampleGeneratorImageTemporal.py +++ b/samplelib/SampleGeneratorImageTemporal.py @@ -1,78 +1,78 @@ -import traceback -import numpy as np -import cv2 - -from utils import iter_utils - -from samplelib import SampleType, SampleProcessor, SampleLoader, SampleGeneratorBase - -''' -output_sample_types = [ - [SampleProcessor.TypeFlags, size, (optional)random_sub_size] , - ... - ] -''' -class SampleGeneratorImageTemporal(SampleGeneratorBase): - def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], **kwargs): - super().__init__(samples_path, debug, batch_size) - - self.temporal_image_count = temporal_image_count - self.sample_process_options = sample_process_options - self.output_sample_types = output_sample_types - - self.samples = SampleLoader.load (SampleType.IMAGE, self.samples_path) - - self.generator_samples = [ self.samples ] - self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] if self.debug else \ - [iter_utils.SubprocessGenerator ( self.batch_func, 0 )] - - self.generator_counter = -1 - - def __iter__(self): - return self - - def __next__(self): - self.generator_counter += 1 - generator = self.generators[self.generator_counter % len(self.generators) ] - return next(generator) - - def batch_func(self, generator_id): - samples = self.generator_samples[generator_id] - samples_len = len(samples) - if samples_len == 0: - raise ValueError('No training data provided.') - - mult_max = 4 - samples_sub_len = samples_len - (self.temporal_image_count-1)*mult_max - if samples_sub_len <= 0: - raise ValueError('Not enough samples to fit temporal line.') - - shuffle_idxs = [] - - while True: - - batches = None - for n_batch in range(self.batch_size): - - if len(shuffle_idxs) == 0: - shuffle_idxs = [ *range(samples_sub_len) ] - np.random.shuffle (shuffle_idxs) - - idx = shuffle_idxs.pop() - - temporal_samples = [] - mult = np.random.randint(mult_max) - for i in range( self.temporal_image_count ): - sample = samples[ idx+i*mult ] - try: - temporal_samples += SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug) - except: - raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) - - if batches is None: - batches = [ [] for _ in range(len(temporal_samples)) ] - - for i in range(len(temporal_samples)): - batches[i].append ( temporal_samples[i] ) - - yield [ np.array(batch) for batch in batches] +import traceback +import numpy as np +import cv2 + +from utils import iter_utils + +from samplelib import SampleType, SampleProcessor, SampleLoader, SampleGeneratorBase + +''' +output_sample_types = [ + [SampleProcessor.TypeFlags, size, (optional)random_sub_size] , + ... + ] +''' +class SampleGeneratorImageTemporal(SampleGeneratorBase): + def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], **kwargs): + super().__init__(samples_path, debug, batch_size) + + self.temporal_image_count = temporal_image_count + self.sample_process_options = sample_process_options + self.output_sample_types = output_sample_types + + self.samples = SampleLoader.load (SampleType.IMAGE, self.samples_path) + + self.generator_samples = [ self.samples ] + self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] if self.debug else \ + [iter_utils.SubprocessGenerator ( self.batch_func, 0 )] + + self.generator_counter = -1 + + def __iter__(self): + return self + + def __next__(self): + self.generator_counter += 1 + generator = self.generators[self.generator_counter % len(self.generators) ] + return next(generator) + + def batch_func(self, generator_id): + samples = self.generator_samples[generator_id] + samples_len = len(samples) + if samples_len == 0: + raise ValueError('No training data provided.') + + mult_max = 4 + samples_sub_len = samples_len - (self.temporal_image_count-1)*mult_max + if samples_sub_len <= 0: + raise ValueError('Not enough samples to fit temporal line.') + + shuffle_idxs = [] + + while True: + + batches = None + for n_batch in range(self.batch_size): + + if len(shuffle_idxs) == 0: + shuffle_idxs = [ *range(samples_sub_len) ] + np.random.shuffle (shuffle_idxs) + + idx = shuffle_idxs.pop() + + temporal_samples = [] + mult = np.random.randint(mult_max) + for i in range( self.temporal_image_count ): + sample = samples[ idx+i*mult ] + try: + temporal_samples += SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) + + if batches is None: + batches = [ [] for _ in range(len(temporal_samples)) ] + + for i in range(len(temporal_samples)): + batches[i].append ( temporal_samples[i] ) + + yield [ np.array(batch) for batch in batches] diff --git a/samplelib/SampleLoader.py b/samplelib/SampleLoader.py index 42e48be..762171f 100644 --- a/samplelib/SampleLoader.py +++ b/samplelib/SampleLoader.py @@ -1,152 +1,152 @@ -import operator -import traceback -from enum import IntEnum -from pathlib import Path - -import cv2 -import numpy as np - -from facelib import FaceType, LandmarksProcessor -from interact import interact as io -from utils import Path_utils -from utils.DFLJPG import DFLJPG -from utils.DFLPNG import DFLPNG - -from .Sample import Sample, SampleType - - -class SampleLoader: - cache = dict() - - @staticmethod - def load(sample_type, samples_path, target_samples_path=None): - cache = SampleLoader.cache - - if str(samples_path) not in cache.keys(): - cache[str(samples_path)] = [None]*SampleType.QTY - - datas = cache[str(samples_path)] - - if sample_type == SampleType.IMAGE: - if datas[sample_type] is None: - datas[sample_type] = [ Sample(filename=filename) for filename in io.progress_bar_generator( Path_utils.get_image_paths(samples_path), "Loading") ] - - elif sample_type == SampleType.FACE: - if datas[sample_type] is None: - datas[sample_type] = SampleLoader.upgradeToFaceSamples( [ Sample(filename=filename) for filename in Path_utils.get_image_paths(samples_path) ] ) - - elif sample_type == SampleType.FACE_TEMPORAL_SORTED: - if datas[sample_type] is None: - datas[sample_type] = SampleLoader.upgradeToFaceTemporalSortedSamples( SampleLoader.load(SampleType.FACE, samples_path) ) - - elif sample_type == SampleType.FACE_YAW_SORTED: - if datas[sample_type] is None: - datas[sample_type] = SampleLoader.upgradeToFaceYawSortedSamples( SampleLoader.load(SampleType.FACE, samples_path) ) - - elif sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: - if datas[sample_type] is None: - if target_samples_path is None: - raise Exception('target_samples_path is None for FACE_YAW_SORTED_AS_TARGET') - datas[sample_type] = SampleLoader.upgradeToFaceYawSortedAsTargetSamples( SampleLoader.load(SampleType.FACE_YAW_SORTED, samples_path), SampleLoader.load(SampleType.FACE_YAW_SORTED, target_samples_path) ) - - return datas[sample_type] - - @staticmethod - def upgradeToFaceSamples ( samples ): - sample_list = [] - - for s in io.progress_bar_generator(samples, "Loading"): - s_filename_path = Path(s.filename) - try: - if s_filename_path.suffix == '.png': - dflimg = DFLPNG.load ( str(s_filename_path) ) - elif s_filename_path.suffix == '.jpg': - dflimg = DFLJPG.load ( str(s_filename_path) ) - else: - dflimg = None - - if dflimg is None: - print ("%s is not a dfl image file required for training" % (s_filename_path.name) ) - continue - - landmarks = dflimg.get_landmarks() - pitch_yaw_roll = dflimg.get_pitch_yaw_roll() - if pitch_yaw_roll is None: - pitch_yaw_roll = LandmarksProcessor.estimate_pitch_yaw_roll(landmarks) - - - sample_list.append( s.copy_and_set(sample_type=SampleType.FACE, - face_type=FaceType.fromString (dflimg.get_face_type()), - shape=dflimg.get_shape(), - landmarks=landmarks, - ie_polys=dflimg.get_ie_polys(), - pitch_yaw_roll=pitch_yaw_roll, - source_filename=dflimg.get_source_filename(), - fanseg_mask_exist=dflimg.get_fanseg_mask() is not None, ) ) - except: - print ("Unable to load %s , error: %s" % (str(s_filename_path), traceback.format_exc() ) ) - - return sample_list - - @staticmethod - def upgradeToFaceTemporalSortedSamples( samples ): - new_s = [ (s, s.source_filename) for s in samples] - new_s = sorted(new_s, key=operator.itemgetter(1)) - - return [ s[0] for s in new_s] - - @staticmethod - def upgradeToFaceYawSortedSamples( samples ): - - lowest_yaw, highest_yaw = -1.0, 1.0 - gradations = 64 - diff_rot_per_grad = abs(highest_yaw-lowest_yaw) / gradations - - yaws_sample_list = [None]*gradations - - for i in io.progress_bar_generator(range(gradations), "Sorting"): - yaw = lowest_yaw + i*diff_rot_per_grad - next_yaw = lowest_yaw + (i+1)*diff_rot_per_grad - - yaw_samples = [] - for s in samples: - s_yaw = s.pitch_yaw_roll[1] - if (i == 0 and s_yaw < next_yaw) or \ - (i < gradations-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ - (i == gradations-1 and s_yaw >= yaw): - yaw_samples.append ( s.copy_and_set(sample_type=SampleType.FACE_YAW_SORTED) ) - - if len(yaw_samples) > 0: - yaws_sample_list[i] = yaw_samples - - return yaws_sample_list - - @staticmethod - def upgradeToFaceYawSortedAsTargetSamples (s, t): - l = len(s) - if l != len(t): - raise Exception('upgradeToFaceYawSortedAsTargetSamples() s_len != t_len') - b = l // 2 - - s_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in s] ) == 1 )[:,0] - t_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in t] ) == 1 )[:,0] - - new_s = [None]*l - - for t_idx in t_idxs: - search_idxs = [] - for i in range(0,l): - search_idxs += [t_idx - i, (l-t_idx-1) - i, t_idx + i, (l-t_idx-1) + i] - - for search_idx in search_idxs: - if search_idx in s_idxs: - mirrored = ( t_idx != search_idx and ((t_idx < b and search_idx >= b) or (search_idx < b and t_idx >= b)) ) - new_s[t_idx] = [ sample.copy_and_set(sample_type=SampleType.FACE_YAW_SORTED_AS_TARGET, - mirror=True, - pitch_yaw_roll=(sample.pitch_yaw_roll[0],-sample.pitch_yaw_roll[1],sample.pitch_yaw_roll[2]), - landmarks=LandmarksProcessor.mirror_landmarks (sample.landmarks, sample.shape[1] )) - for sample in s[search_idx] - ] if mirrored else s[search_idx] - break - - return new_s +import operator +import traceback +from enum import IntEnum +from pathlib import Path + +import cv2 +import numpy as np + +from facelib import FaceType, LandmarksProcessor +from interact import interact as io +from utils import Path_utils +from utils.DFLJPG import DFLJPG +from utils.DFLPNG import DFLPNG + +from .Sample import Sample, SampleType + + +class SampleLoader: + cache = dict() + + @staticmethod + def load(sample_type, samples_path, target_samples_path=None): + cache = SampleLoader.cache + + if str(samples_path) not in cache.keys(): + cache[str(samples_path)] = [None]*SampleType.QTY + + datas = cache[str(samples_path)] + + if sample_type == SampleType.IMAGE: + if datas[sample_type] is None: + datas[sample_type] = [ Sample(filename=filename) for filename in io.progress_bar_generator( Path_utils.get_image_paths(samples_path), "Loading") ] + + elif sample_type == SampleType.FACE: + if datas[sample_type] is None: + datas[sample_type] = SampleLoader.upgradeToFaceSamples( [ Sample(filename=filename) for filename in Path_utils.get_image_paths(samples_path) ] ) + + elif sample_type == SampleType.FACE_TEMPORAL_SORTED: + if datas[sample_type] is None: + datas[sample_type] = SampleLoader.upgradeToFaceTemporalSortedSamples( SampleLoader.load(SampleType.FACE, samples_path) ) + + elif sample_type == SampleType.FACE_YAW_SORTED: + if datas[sample_type] is None: + datas[sample_type] = SampleLoader.upgradeToFaceYawSortedSamples( SampleLoader.load(SampleType.FACE, samples_path) ) + + elif sample_type == SampleType.FACE_YAW_SORTED_AS_TARGET: + if datas[sample_type] is None: + if target_samples_path is None: + raise Exception('target_samples_path is None for FACE_YAW_SORTED_AS_TARGET') + datas[sample_type] = SampleLoader.upgradeToFaceYawSortedAsTargetSamples( SampleLoader.load(SampleType.FACE_YAW_SORTED, samples_path), SampleLoader.load(SampleType.FACE_YAW_SORTED, target_samples_path) ) + + return datas[sample_type] + + @staticmethod + def upgradeToFaceSamples ( samples ): + sample_list = [] + + for s in io.progress_bar_generator(samples, "Loading"): + s_filename_path = Path(s.filename) + try: + if s_filename_path.suffix == '.png': + dflimg = DFLPNG.load ( str(s_filename_path) ) + elif s_filename_path.suffix == '.jpg': + dflimg = DFLJPG.load ( str(s_filename_path) ) + else: + dflimg = None + + if dflimg is None: + print ("%s is not a dfl image file required for training" % (s_filename_path.name) ) + continue + + landmarks = dflimg.get_landmarks() + pitch_yaw_roll = dflimg.get_pitch_yaw_roll() + if pitch_yaw_roll is None: + pitch_yaw_roll = LandmarksProcessor.estimate_pitch_yaw_roll(landmarks) + + + sample_list.append( s.copy_and_set(sample_type=SampleType.FACE, + face_type=FaceType.fromString (dflimg.get_face_type()), + shape=dflimg.get_shape(), + landmarks=landmarks, + ie_polys=dflimg.get_ie_polys(), + pitch_yaw_roll=pitch_yaw_roll, + source_filename=dflimg.get_source_filename(), + fanseg_mask_exist=dflimg.get_fanseg_mask() is not None, ) ) + except: + print ("Unable to load %s , error: %s" % (str(s_filename_path), traceback.format_exc() ) ) + + return sample_list + + @staticmethod + def upgradeToFaceTemporalSortedSamples( samples ): + new_s = [ (s, s.source_filename) for s in samples] + new_s = sorted(new_s, key=operator.itemgetter(1)) + + return [ s[0] for s in new_s] + + @staticmethod + def upgradeToFaceYawSortedSamples( samples ): + + lowest_yaw, highest_yaw = -1.0, 1.0 + gradations = 64 + diff_rot_per_grad = abs(highest_yaw-lowest_yaw) / gradations + + yaws_sample_list = [None]*gradations + + for i in io.progress_bar_generator(range(gradations), "Sorting"): + yaw = lowest_yaw + i*diff_rot_per_grad + next_yaw = lowest_yaw + (i+1)*diff_rot_per_grad + + yaw_samples = [] + for s in samples: + s_yaw = s.pitch_yaw_roll[1] + if (i == 0 and s_yaw < next_yaw) or \ + (i < gradations-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ + (i == gradations-1 and s_yaw >= yaw): + yaw_samples.append ( s.copy_and_set(sample_type=SampleType.FACE_YAW_SORTED) ) + + if len(yaw_samples) > 0: + yaws_sample_list[i] = yaw_samples + + return yaws_sample_list + + @staticmethod + def upgradeToFaceYawSortedAsTargetSamples (s, t): + l = len(s) + if l != len(t): + raise Exception('upgradeToFaceYawSortedAsTargetSamples() s_len != t_len') + b = l // 2 + + s_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in s] ) == 1 )[:,0] + t_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in t] ) == 1 )[:,0] + + new_s = [None]*l + + for t_idx in t_idxs: + search_idxs = [] + for i in range(0,l): + search_idxs += [t_idx - i, (l-t_idx-1) - i, t_idx + i, (l-t_idx-1) + i] + + for search_idx in search_idxs: + if search_idx in s_idxs: + mirrored = ( t_idx != search_idx and ((t_idx < b and search_idx >= b) or (search_idx < b and t_idx >= b)) ) + new_s[t_idx] = [ sample.copy_and_set(sample_type=SampleType.FACE_YAW_SORTED_AS_TARGET, + mirror=True, + pitch_yaw_roll=(sample.pitch_yaw_roll[0],-sample.pitch_yaw_roll[1],sample.pitch_yaw_roll[2]), + landmarks=LandmarksProcessor.mirror_landmarks (sample.landmarks, sample.shape[1] )) + for sample in s[search_idx] + ] if mirrored else s[search_idx] + break + + return new_s diff --git a/samplelib/SampleProcessor.py b/samplelib/SampleProcessor.py index bab51c9..0921e52 100644 --- a/samplelib/SampleProcessor.py +++ b/samplelib/SampleProcessor.py @@ -1,316 +1,316 @@ -import collections -from enum import IntEnum - -import cv2 -import numpy as np - -import imagelib -from facelib import FaceType, LandmarksProcessor - - -""" -output_sample_types = [ - {} opts, - ... - ] - -opts: - 'types' : (S,S,...,S) - where S: - 'IMG_SOURCE' - 'IMG_WARPED' - 'IMG_WARPED_TRANSFORMED'' - 'IMG_TRANSFORMED' - 'IMG_LANDMARKS_ARRAY' #currently unused - 'IMG_PITCH_YAW_ROLL' - - 'FACE_TYPE_HALF' - 'FACE_TYPE_FULL' - 'FACE_TYPE_HEAD' #currently unused - 'FACE_TYPE_AVATAR' #currently unused - - 'MODE_BGR' #BGR - 'MODE_G' #Grayscale - 'MODE_GGG' #3xGrayscale - 'MODE_M' #mask only - 'MODE_BGR_SHUFFLE' #BGR shuffle - - 'resolution' : N - 'motion_blur' : (chance_int, range) - chance 0..100 to apply to face (not mask), and range [1..3] where 3 is highest power of motion blur - 'apply_ct' : bool - 'normalize_tanh' : bool - -""" - -class SampleProcessor(object): - class Types(IntEnum): - NONE = 0 - - IMG_TYPE_BEGIN = 1 - IMG_SOURCE = 1 - IMG_WARPED = 2 - IMG_WARPED_TRANSFORMED = 3 - IMG_TRANSFORMED = 4 - IMG_LANDMARKS_ARRAY = 5 #currently unused - IMG_PITCH_YAW_ROLL = 6 - IMG_PITCH_YAW_ROLL_SIGMOID = 7 - IMG_TYPE_END = 10 - - FACE_TYPE_BEGIN = 10 - FACE_TYPE_HALF = 10 - FACE_TYPE_FULL = 11 - FACE_TYPE_HEAD = 12 #currently unused - FACE_TYPE_AVATAR = 13 #currently unused - FACE_TYPE_END = 20 - - MODE_BEGIN = 40 - MODE_BGR = 40 #BGR - MODE_G = 41 #Grayscale - MODE_GGG = 42 #3xGrayscale - MODE_M = 43 #mask only - MODE_BGR_SHUFFLE = 44 #BGR shuffle - MODE_END = 50 - - class Options(object): - - def __init__(self, random_flip = True, rotation_range=[-10,10], scale_range=[-0.05, 0.05], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): - self.random_flip = random_flip - self.rotation_range = rotation_range - self.scale_range = scale_range - self.tx_range = tx_range - self.ty_range = ty_range - - @staticmethod - def process (sample, sample_process_options, output_sample_types, debug, ct_sample=None): - SPTF = SampleProcessor.Types - - sample_bgr = sample.load_bgr() - ct_sample_bgr = None - ct_sample_mask = None - h,w,c = sample_bgr.shape - - is_face_sample = sample.landmarks is not None - - if debug and is_face_sample: - LandmarksProcessor.draw_landmarks (sample_bgr, sample.landmarks, (0, 1, 0)) - - params = imagelib.gen_warp_params(sample_bgr, sample_process_options.random_flip, rotation_range=sample_process_options.rotation_range, scale_range=sample_process_options.scale_range, tx_range=sample_process_options.tx_range, ty_range=sample_process_options.ty_range ) - - cached_images = collections.defaultdict(dict) - - sample_rnd_seed = np.random.randint(0x80000000) - - SPTF_FACETYPE_TO_FACETYPE = { SPTF.FACE_TYPE_HALF : FaceType.HALF, - SPTF.FACE_TYPE_FULL : FaceType.FULL, - SPTF.FACE_TYPE_HEAD : FaceType.HEAD, - SPTF.FACE_TYPE_AVATAR : FaceType.AVATAR } - - outputs = [] - for opts in output_sample_types: - - resolution = opts.get('resolution', 0) - types = opts.get('types', [] ) - - random_sub_res = opts.get('random_sub_res', 0) - normalize_std_dev = opts.get('normalize_std_dev', False) - normalize_vgg = opts.get('normalize_vgg', False) - motion_blur = opts.get('motion_blur', None) - apply_ct = opts.get('apply_ct', False) - normalize_tanh = opts.get('normalize_tanh', False) - - img_type = SPTF.NONE - target_face_type = SPTF.NONE - face_mask_type = SPTF.NONE - mode_type = SPTF.NONE - for t in types: - if t >= SPTF.IMG_TYPE_BEGIN and t < SPTF.IMG_TYPE_END: - img_type = t - elif t >= SPTF.FACE_TYPE_BEGIN and t < SPTF.FACE_TYPE_END: - target_face_type = t - elif t >= SPTF.MODE_BEGIN and t < SPTF.MODE_END: - mode_type = t - - if img_type == SPTF.NONE: - raise ValueError ('expected IMG_ type') - - if img_type == SPTF.IMG_LANDMARKS_ARRAY: - l = sample.landmarks - l = np.concatenate ( [ np.expand_dims(l[:,0] / w,-1), np.expand_dims(l[:,1] / h,-1) ], -1 ) - l = np.clip(l, 0.0, 1.0) - img = l - elif img_type == SPTF.IMG_PITCH_YAW_ROLL or img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: - pitch_yaw_roll = sample.pitch_yaw_roll - if pitch_yaw_roll is not None: - pitch, yaw, roll = pitch_yaw_roll - else: - pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll (sample.landmarks) - if params['flip']: - yaw = -yaw - - if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: - pitch = (pitch+1.0) / 2.0 - yaw = (yaw+1.0) / 2.0 - roll = (roll+1.0) / 2.0 - - img = (pitch, yaw, roll) - else: - if mode_type == SPTF.NONE: - raise ValueError ('expected MODE_ type') - - img = cached_images.get(img_type, None) - if img is None: - - img = sample_bgr - mask = None - cur_sample = sample - - if is_face_sample: - if motion_blur is not None: - chance, mb_range = motion_blur - chance = np.clip(chance, 0, 100) - - if np.random.randint(100) < chance: - mb_range = [3,5,7,9][ : np.clip(mb_range, 0, 3)+1 ] - dim = mb_range[ np.random.randint(len(mb_range) ) ] - img = imagelib.LinearMotionBlur (img, dim, np.random.randint(180) ) - - mask = cur_sample.load_fanseg_mask() #using fanseg_mask if exist - - if mask is None: - mask = LandmarksProcessor.get_image_hull_mask (img.shape, cur_sample.landmarks) - - if cur_sample.ie_polys is not None: - cur_sample.ie_polys.overlay_mask(mask) - - warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED) - transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED) - flip = img_type != SPTF.IMG_WARPED - - img = imagelib.warp_by_params (params, img, warp, transform, flip, True) - if mask is not None: - mask = imagelib.warp_by_params (params, mask, warp, transform, flip, False)[...,np.newaxis] - img = np.concatenate( (img, mask ), -1 ) - - cached_images[img_type] = img - - if is_face_sample and target_face_type != SPTF.NONE: - ft = SPTF_FACETYPE_TO_FACETYPE[target_face_type] - if ft > sample.face_type: - raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, ft) ) - img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, ft), (resolution,resolution), flags=cv2.INTER_CUBIC ) - else: - img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC ) - - if random_sub_res != 0: - sub_size = resolution - random_sub_res - rnd_state = np.random.RandomState (sample_rnd_seed+random_sub_res) - start_x = rnd_state.randint(sub_size+1) - start_y = rnd_state.randint(sub_size+1) - img = img[start_y:start_y+sub_size,start_x:start_x+sub_size,:] - - img = np.clip(img, 0, 1) - img_bgr = img[...,0:3] - img_mask = img[...,3:4] - - if apply_ct and ct_sample is not None: - if ct_sample_bgr is None: - ct_sample_bgr = ct_sample.load_bgr() - - ct_sample_bgr_resized = cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR ) - - img_bgr = imagelib.linear_color_transfer (img_bgr, ct_sample_bgr_resized) - img_bgr = np.clip( img_bgr, 0.0, 1.0) - - if normalize_std_dev: - img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) ) - elif normalize_vgg: - img_bgr = np.clip(img_bgr*255, 0, 255) - img_bgr[:,:,0] -= 103.939 - img_bgr[:,:,1] -= 116.779 - img_bgr[:,:,2] -= 123.68 - - if mode_type == SPTF.MODE_BGR: - img = img_bgr - elif mode_type == SPTF.MODE_BGR_SHUFFLE: - rnd_state = np.random.RandomState (sample_rnd_seed) - img = np.take (img_bgr, rnd_state.permutation(img_bgr.shape[-1]), axis=-1) - elif mode_type == SPTF.MODE_G: - img = np.concatenate ( (np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1),img_mask) , -1 ) - elif mode_type == SPTF.MODE_GGG: - img = np.concatenate ( ( np.repeat ( np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1), (3,), -1), img_mask), -1) - elif mode_type == SPTF.MODE_M and is_face_sample: - img = img_mask - - if not debug: - if normalize_tanh: - img = np.clip (img * 2.0 - 1.0, -1.0, 1.0) - else: - img = np.clip (img, 0.0, 1.0) - - outputs.append ( img ) - - if debug: - result = [] - - for output in outputs: - if output.shape[2] < 4: - result += [output,] - elif output.shape[2] == 4: - result += [output[...,0:3]*output[...,3:4],] - - return result - else: - return outputs - -""" - close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None - close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None - - if debug and close_sample_bgr is not None: - LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0)) - RANDOM_CLOSE = 0x00000040, #currently unused - MORPH_TO_RANDOM_CLOSE = 0x00000080, #currently unused - -if f & SPTF.RANDOM_CLOSE != 0: - img_type += 10 - elif f & SPTF.MORPH_TO_RANDOM_CLOSE != 0: - img_type += 20 -if img_type >= 10 and img_type <= 19: #RANDOM_CLOSE - img_type -= 10 - img = close_sample_bgr - cur_sample = close_sample - -elif img_type >= 20 and img_type <= 29: #MORPH_TO_RANDOM_CLOSE - img_type -= 20 - res = sample.shape[0] - - s_landmarks = sample.landmarks.copy() - d_landmarks = close_sample.landmarks.copy() - idxs = list(range(len(s_landmarks))) - #remove landmarks near boundaries - for i in idxs[:]: - s_l = s_landmarks[i] - d_l = d_landmarks[i] - if s_l[0] < 5 or s_l[1] < 5 or s_l[0] >= res-5 or s_l[1] >= res-5 or \ - d_l[0] < 5 or d_l[1] < 5 or d_l[0] >= res-5 or d_l[1] >= res-5: - idxs.remove(i) - #remove landmarks that close to each other in 5 dist - for landmarks in [s_landmarks, d_landmarks]: - for i in idxs[:]: - s_l = landmarks[i] - for j in idxs[:]: - if i == j: - continue - s_l_2 = landmarks[j] - diff_l = np.abs(s_l - s_l_2) - if np.sqrt(diff_l.dot(diff_l)) < 5: - idxs.remove(i) - break - s_landmarks = s_landmarks[idxs] - d_landmarks = d_landmarks[idxs] - s_landmarks = np.concatenate ( [s_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] ) - d_landmarks = np.concatenate ( [d_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] ) - img = imagelib.morph_by_points (sample_bgr, s_landmarks, d_landmarks) - cur_sample = close_sample -else: - """ +import collections +from enum import IntEnum + +import cv2 +import numpy as np + +import imagelib +from facelib import FaceType, LandmarksProcessor + + +""" +output_sample_types = [ + {} opts, + ... + ] + +opts: + 'types' : (S,S,...,S) + where S: + 'IMG_SOURCE' + 'IMG_WARPED' + 'IMG_WARPED_TRANSFORMED'' + 'IMG_TRANSFORMED' + 'IMG_LANDMARKS_ARRAY' #currently unused + 'IMG_PITCH_YAW_ROLL' + + 'FACE_TYPE_HALF' + 'FACE_TYPE_FULL' + 'FACE_TYPE_HEAD' #currently unused + 'FACE_TYPE_AVATAR' #currently unused + + 'MODE_BGR' #BGR + 'MODE_G' #Grayscale + 'MODE_GGG' #3xGrayscale + 'MODE_M' #mask only + 'MODE_BGR_SHUFFLE' #BGR shuffle + + 'resolution' : N + 'motion_blur' : (chance_int, range) - chance 0..100 to apply to face (not mask), and range [1..3] where 3 is highest power of motion blur + 'apply_ct' : bool + 'normalize_tanh' : bool + +""" + +class SampleProcessor(object): + class Types(IntEnum): + NONE = 0 + + IMG_TYPE_BEGIN = 1 + IMG_SOURCE = 1 + IMG_WARPED = 2 + IMG_WARPED_TRANSFORMED = 3 + IMG_TRANSFORMED = 4 + IMG_LANDMARKS_ARRAY = 5 #currently unused + IMG_PITCH_YAW_ROLL = 6 + IMG_PITCH_YAW_ROLL_SIGMOID = 7 + IMG_TYPE_END = 10 + + FACE_TYPE_BEGIN = 10 + FACE_TYPE_HALF = 10 + FACE_TYPE_FULL = 11 + FACE_TYPE_HEAD = 12 #currently unused + FACE_TYPE_AVATAR = 13 #currently unused + FACE_TYPE_END = 20 + + MODE_BEGIN = 40 + MODE_BGR = 40 #BGR + MODE_G = 41 #Grayscale + MODE_GGG = 42 #3xGrayscale + MODE_M = 43 #mask only + MODE_BGR_SHUFFLE = 44 #BGR shuffle + MODE_END = 50 + + class Options(object): + + def __init__(self, random_flip = True, rotation_range=[-10,10], scale_range=[-0.05, 0.05], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): + self.random_flip = random_flip + self.rotation_range = rotation_range + self.scale_range = scale_range + self.tx_range = tx_range + self.ty_range = ty_range + + @staticmethod + def process (sample, sample_process_options, output_sample_types, debug, ct_sample=None): + SPTF = SampleProcessor.Types + + sample_bgr = sample.load_bgr() + ct_sample_bgr = None + ct_sample_mask = None + h,w,c = sample_bgr.shape + + is_face_sample = sample.landmarks is not None + + if debug and is_face_sample: + LandmarksProcessor.draw_landmarks (sample_bgr, sample.landmarks, (0, 1, 0)) + + params = imagelib.gen_warp_params(sample_bgr, sample_process_options.random_flip, rotation_range=sample_process_options.rotation_range, scale_range=sample_process_options.scale_range, tx_range=sample_process_options.tx_range, ty_range=sample_process_options.ty_range ) + + cached_images = collections.defaultdict(dict) + + sample_rnd_seed = np.random.randint(0x80000000) + + SPTF_FACETYPE_TO_FACETYPE = { SPTF.FACE_TYPE_HALF : FaceType.HALF, + SPTF.FACE_TYPE_FULL : FaceType.FULL, + SPTF.FACE_TYPE_HEAD : FaceType.HEAD, + SPTF.FACE_TYPE_AVATAR : FaceType.AVATAR } + + outputs = [] + for opts in output_sample_types: + + resolution = opts.get('resolution', 0) + types = opts.get('types', [] ) + + random_sub_res = opts.get('random_sub_res', 0) + normalize_std_dev = opts.get('normalize_std_dev', False) + normalize_vgg = opts.get('normalize_vgg', False) + motion_blur = opts.get('motion_blur', None) + apply_ct = opts.get('apply_ct', False) + normalize_tanh = opts.get('normalize_tanh', False) + + img_type = SPTF.NONE + target_face_type = SPTF.NONE + face_mask_type = SPTF.NONE + mode_type = SPTF.NONE + for t in types: + if t >= SPTF.IMG_TYPE_BEGIN and t < SPTF.IMG_TYPE_END: + img_type = t + elif t >= SPTF.FACE_TYPE_BEGIN and t < SPTF.FACE_TYPE_END: + target_face_type = t + elif t >= SPTF.MODE_BEGIN and t < SPTF.MODE_END: + mode_type = t + + if img_type == SPTF.NONE: + raise ValueError ('expected IMG_ type') + + if img_type == SPTF.IMG_LANDMARKS_ARRAY: + l = sample.landmarks + l = np.concatenate ( [ np.expand_dims(l[:,0] / w,-1), np.expand_dims(l[:,1] / h,-1) ], -1 ) + l = np.clip(l, 0.0, 1.0) + img = l + elif img_type == SPTF.IMG_PITCH_YAW_ROLL or img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: + pitch_yaw_roll = sample.pitch_yaw_roll + if pitch_yaw_roll is not None: + pitch, yaw, roll = pitch_yaw_roll + else: + pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll (sample.landmarks) + if params['flip']: + yaw = -yaw + + if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: + pitch = (pitch+1.0) / 2.0 + yaw = (yaw+1.0) / 2.0 + roll = (roll+1.0) / 2.0 + + img = (pitch, yaw, roll) + else: + if mode_type == SPTF.NONE: + raise ValueError ('expected MODE_ type') + + img = cached_images.get(img_type, None) + if img is None: + + img = sample_bgr + mask = None + cur_sample = sample + + if is_face_sample: + if motion_blur is not None: + chance, mb_range = motion_blur + chance = np.clip(chance, 0, 100) + + if np.random.randint(100) < chance: + mb_range = [3,5,7,9][ : np.clip(mb_range, 0, 3)+1 ] + dim = mb_range[ np.random.randint(len(mb_range) ) ] + img = imagelib.LinearMotionBlur (img, dim, np.random.randint(180) ) + + mask = cur_sample.load_fanseg_mask() #using fanseg_mask if exist + + if mask is None: + mask = LandmarksProcessor.get_image_hull_mask (img.shape, cur_sample.landmarks) + + if cur_sample.ie_polys is not None: + cur_sample.ie_polys.overlay_mask(mask) + + warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED) + transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED) + flip = img_type != SPTF.IMG_WARPED + + img = imagelib.warp_by_params (params, img, warp, transform, flip, True) + if mask is not None: + mask = imagelib.warp_by_params (params, mask, warp, transform, flip, False)[...,np.newaxis] + img = np.concatenate( (img, mask ), -1 ) + + cached_images[img_type] = img + + if is_face_sample and target_face_type != SPTF.NONE: + ft = SPTF_FACETYPE_TO_FACETYPE[target_face_type] + if ft > sample.face_type: + raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, ft) ) + img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, ft), (resolution,resolution), flags=cv2.INTER_CUBIC ) + else: + img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC ) + + if random_sub_res != 0: + sub_size = resolution - random_sub_res + rnd_state = np.random.RandomState (sample_rnd_seed+random_sub_res) + start_x = rnd_state.randint(sub_size+1) + start_y = rnd_state.randint(sub_size+1) + img = img[start_y:start_y+sub_size,start_x:start_x+sub_size,:] + + img = np.clip(img, 0, 1) + img_bgr = img[...,0:3] + img_mask = img[...,3:4] + + if apply_ct and ct_sample is not None: + if ct_sample_bgr is None: + ct_sample_bgr = ct_sample.load_bgr() + + ct_sample_bgr_resized = cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR ) + + img_bgr = imagelib.linear_color_transfer (img_bgr, ct_sample_bgr_resized) + img_bgr = np.clip( img_bgr, 0.0, 1.0) + + if normalize_std_dev: + img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) ) + elif normalize_vgg: + img_bgr = np.clip(img_bgr*255, 0, 255) + img_bgr[:,:,0] -= 103.939 + img_bgr[:,:,1] -= 116.779 + img_bgr[:,:,2] -= 123.68 + + if mode_type == SPTF.MODE_BGR: + img = img_bgr + elif mode_type == SPTF.MODE_BGR_SHUFFLE: + rnd_state = np.random.RandomState (sample_rnd_seed) + img = np.take (img_bgr, rnd_state.permutation(img_bgr.shape[-1]), axis=-1) + elif mode_type == SPTF.MODE_G: + img = np.concatenate ( (np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1),img_mask) , -1 ) + elif mode_type == SPTF.MODE_GGG: + img = np.concatenate ( ( np.repeat ( np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1), (3,), -1), img_mask), -1) + elif mode_type == SPTF.MODE_M and is_face_sample: + img = img_mask + + if not debug: + if normalize_tanh: + img = np.clip (img * 2.0 - 1.0, -1.0, 1.0) + else: + img = np.clip (img, 0.0, 1.0) + + outputs.append ( img ) + + if debug: + result = [] + + for output in outputs: + if output.shape[2] < 4: + result += [output,] + elif output.shape[2] == 4: + result += [output[...,0:3]*output[...,3:4],] + + return result + else: + return outputs + +""" + close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None + close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None + + if debug and close_sample_bgr is not None: + LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0)) + RANDOM_CLOSE = 0x00000040, #currently unused + MORPH_TO_RANDOM_CLOSE = 0x00000080, #currently unused + +if f & SPTF.RANDOM_CLOSE != 0: + img_type += 10 + elif f & SPTF.MORPH_TO_RANDOM_CLOSE != 0: + img_type += 20 +if img_type >= 10 and img_type <= 19: #RANDOM_CLOSE + img_type -= 10 + img = close_sample_bgr + cur_sample = close_sample + +elif img_type >= 20 and img_type <= 29: #MORPH_TO_RANDOM_CLOSE + img_type -= 20 + res = sample.shape[0] + + s_landmarks = sample.landmarks.copy() + d_landmarks = close_sample.landmarks.copy() + idxs = list(range(len(s_landmarks))) + #remove landmarks near boundaries + for i in idxs[:]: + s_l = s_landmarks[i] + d_l = d_landmarks[i] + if s_l[0] < 5 or s_l[1] < 5 or s_l[0] >= res-5 or s_l[1] >= res-5 or \ + d_l[0] < 5 or d_l[1] < 5 or d_l[0] >= res-5 or d_l[1] >= res-5: + idxs.remove(i) + #remove landmarks that close to each other in 5 dist + for landmarks in [s_landmarks, d_landmarks]: + for i in idxs[:]: + s_l = landmarks[i] + for j in idxs[:]: + if i == j: + continue + s_l_2 = landmarks[j] + diff_l = np.abs(s_l - s_l_2) + if np.sqrt(diff_l.dot(diff_l)) < 5: + idxs.remove(i) + break + s_landmarks = s_landmarks[idxs] + d_landmarks = d_landmarks[idxs] + s_landmarks = np.concatenate ( [s_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] ) + d_landmarks = np.concatenate ( [d_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] ) + img = imagelib.morph_by_points (sample_bgr, s_landmarks, d_landmarks) + cur_sample = close_sample +else: + """ diff --git a/samplelib/__init__.py b/samplelib/__init__.py index ceefca2..d865394 100644 --- a/samplelib/__init__.py +++ b/samplelib/__init__.py @@ -1,8 +1,8 @@ -from .Sample import Sample -from .Sample import SampleType -from .SampleLoader import SampleLoader -from .SampleProcessor import SampleProcessor -from .SampleGeneratorBase import SampleGeneratorBase -from .SampleGeneratorFace import SampleGeneratorFace -from .SampleGeneratorFaceTemporal import SampleGeneratorFaceTemporal -from .SampleGeneratorImageTemporal import SampleGeneratorImageTemporal +from .Sample import Sample +from .Sample import SampleType +from .SampleLoader import SampleLoader +from .SampleProcessor import SampleProcessor +from .SampleGeneratorBase import SampleGeneratorBase +from .SampleGeneratorFace import SampleGeneratorFace +from .SampleGeneratorFaceTemporal import SampleGeneratorFaceTemporal +from .SampleGeneratorImageTemporal import SampleGeneratorImageTemporal diff --git a/utils/DFLJPG.py b/utils/DFLJPG.py index 7be82a6..c06e96a 100644 --- a/utils/DFLJPG.py +++ b/utils/DFLJPG.py @@ -1,303 +1,303 @@ -import pickle -import struct - -import cv2 -import numpy as np - -from facelib import FaceType -from imagelib import IEPolys -from utils.struct_utils import * -from interact import interact as io - -class DFLJPG(object): - def __init__(self): - self.data = b"" - self.length = 0 - self.chunks = [] - self.dfl_dict = None - self.shape = (0,0,0) - - @staticmethod - def load_raw(filename): - try: - with open(filename, "rb") as f: - data = f.read() - except: - raise FileNotFoundError(filename) - - try: - inst = DFLJPG() - inst.data = data - inst.length = len(data) - inst_length = inst.length - chunks = [] - data_counter = 0 - while data_counter < inst_length: - chunk_m_l, chunk_m_h = struct.unpack ("BB", data[data_counter:data_counter+2]) - data_counter += 2 - - if chunk_m_l != 0xFF: - raise ValueError("No Valid JPG info") - - chunk_name = None - chunk_size = None - chunk_data = None - chunk_ex_data = None - is_unk_chunk = False - - if chunk_m_h & 0xF0 == 0xD0: - n = chunk_m_h & 0x0F - - if n >= 0 and n <= 7: - chunk_name = "RST%d" % (n) - chunk_size = 0 - elif n == 0x8: - chunk_name = "SOI" - chunk_size = 0 - if len(chunks) != 0: - raise Exception("") - elif n == 0x9: - chunk_name = "EOI" - chunk_size = 0 - elif n == 0xA: - chunk_name = "SOS" - elif n == 0xB: - chunk_name = "DQT" - elif n == 0xD: - chunk_name = "DRI" - chunk_size = 2 - else: - is_unk_chunk = True - elif chunk_m_h & 0xF0 == 0xC0: - n = chunk_m_h & 0x0F - if n == 0: - chunk_name = "SOF0" - elif n == 2: - chunk_name = "SOF2" - elif n == 4: - chunk_name = "DHT" - else: - is_unk_chunk = True - elif chunk_m_h & 0xF0 == 0xE0: - n = chunk_m_h & 0x0F - chunk_name = "APP%d" % (n) - else: - is_unk_chunk = True - - if is_unk_chunk: - raise ValueError("Unknown chunk %X" % (chunk_m_h) ) - - if chunk_size == None: #variable size - chunk_size, = struct.unpack (">H", data[data_counter:data_counter+2]) - chunk_size -= 2 - data_counter += 2 - - if chunk_size > 0: - chunk_data = data[data_counter:data_counter+chunk_size] - data_counter += chunk_size - - if chunk_name == "SOS": - c = data_counter - while c < inst_length and (data[c] != 0xFF or data[c+1] != 0xD9): - c += 1 - - chunk_ex_data = data[data_counter:c] - data_counter = c - - chunks.append ({'name' : chunk_name, - 'm_h' : chunk_m_h, - 'data' : chunk_data, - 'ex_data' : chunk_ex_data, - }) - inst.chunks = chunks - - return inst - except Exception as e: - raise Exception ("Corrupted JPG file: %s" % (str(e))) - - @staticmethod - def load(filename): - try: - inst = DFLJPG.load_raw (filename) - inst.dfl_dict = None - - for chunk in inst.chunks: - if chunk['name'] == 'APP0': - d, c = chunk['data'], 0 - c, id, _ = struct_unpack (d, c, "=4sB") - - if id == b"JFIF": - c, ver_major, ver_minor, units, Xdensity, Ydensity, Xthumbnail, Ythumbnail = struct_unpack (d, c, "=BBBHHBB") - #if units == 0: - # inst.shape = (Ydensity, Xdensity, 3) - else: - raise Exception("Unknown jpeg ID: %s" % (id) ) - elif chunk['name'] == 'SOF0' or chunk['name'] == 'SOF2': - d, c = chunk['data'], 0 - c, precision, height, width = struct_unpack (d, c, ">BHH") - inst.shape = (height, width, 3) - - elif chunk['name'] == 'APP15': - if type(chunk['data']) == bytes: - inst.dfl_dict = pickle.loads(chunk['data']) - - if (inst.dfl_dict is not None): - if 'face_type' not in inst.dfl_dict: - inst.dfl_dict['face_type'] = FaceType.toString (FaceType.FULL) - - if 'fanseg_mask' in inst.dfl_dict: - fanseg_mask = inst.dfl_dict['fanseg_mask'] - if fanseg_mask is not None: - numpyarray = np.asarray( inst.dfl_dict['fanseg_mask'], dtype=np.uint8) - inst.dfl_dict['fanseg_mask'] = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED) - - if inst.dfl_dict == None: - return None - - return inst - except Exception as e: - print (e) - return None - - @staticmethod - def embed_data(filename, face_type=None, - landmarks=None, - ie_polys=None, - source_filename=None, - source_rect=None, - source_landmarks=None, - image_to_face_mat=None, - fanseg_mask=None, - pitch_yaw_roll=None, - **kwargs - ): - - if fanseg_mask is not None: - fanseg_mask = np.clip ( (fanseg_mask*255).astype(np.uint8), 0, 255 ) - - ret, buf = cv2.imencode( '.jpg', fanseg_mask, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) - - if ret and len(buf) < 60000: - fanseg_mask = buf - else: - io.log_err("Unable to encode fanseg_mask for %s" % (filename) ) - fanseg_mask = None - - inst = DFLJPG.load_raw (filename) - inst.setDFLDictData ({ - 'face_type': face_type, - 'landmarks': landmarks, - 'ie_polys' : ie_polys.dump() if ie_polys is not None else None, - 'source_filename': source_filename, - 'source_rect': source_rect, - 'source_landmarks': source_landmarks, - 'image_to_face_mat': image_to_face_mat, - 'fanseg_mask' : fanseg_mask, - 'pitch_yaw_roll' : pitch_yaw_roll - }) - - try: - with open(filename, "wb") as f: - f.write ( inst.dump() ) - except: - raise Exception( 'cannot save %s' % (filename) ) - - def embed_and_set(self, filename, face_type=None, - landmarks=None, - ie_polys=None, - source_filename=None, - source_rect=None, - source_landmarks=None, - image_to_face_mat=None, - fanseg_mask=None, - pitch_yaw_roll=None, - **kwargs - ): - if face_type is None: face_type = self.get_face_type() - if landmarks is None: landmarks = self.get_landmarks() - if ie_polys is None: ie_polys = self.get_ie_polys() - if source_filename is None: source_filename = self.get_source_filename() - if source_rect is None: source_rect = self.get_source_rect() - if source_landmarks is None: source_landmarks = self.get_source_landmarks() - if image_to_face_mat is None: image_to_face_mat = self.get_image_to_face_mat() - if fanseg_mask is None: fanseg_mask = self.get_fanseg_mask() - if pitch_yaw_roll is None: pitch_yaw_roll = self.get_pitch_yaw_roll() - DFLJPG.embed_data (filename, face_type=face_type, - landmarks=landmarks, - ie_polys=ie_polys, - source_filename=source_filename, - source_rect=source_rect, - source_landmarks=source_landmarks, - image_to_face_mat=image_to_face_mat, - fanseg_mask=fanseg_mask, - pitch_yaw_roll=pitch_yaw_roll) - def remove_fanseg_mask(self): - self.dfl_dict['fanseg_mask'] = None - - def dump(self): - data = b"" - - for chunk in self.chunks: - data += struct.pack ("BB", 0xFF, chunk['m_h'] ) - chunk_data = chunk['data'] - if chunk_data is not None: - data += struct.pack (">H", len(chunk_data)+2 ) - data += chunk_data - - chunk_ex_data = chunk['ex_data'] - if chunk_ex_data is not None: - data += chunk_ex_data - - return data - - def get_shape(self): - return self.shape - - def get_height(self): - for chunk in self.chunks: - if type(chunk) == IHDR: - return chunk.height - return 0 - - def getDFLDictData(self): - return self.dfl_dict - - def setDFLDictData (self, dict_data=None): - self.dfl_dict = dict_data - - for chunk in self.chunks: - if chunk['name'] == 'APP15': - self.chunks.remove(chunk) - break - - last_app_chunk = 0 - for i, chunk in enumerate (self.chunks): - if chunk['m_h'] & 0xF0 == 0xE0: - last_app_chunk = i - - dflchunk = {'name' : 'APP15', - 'm_h' : 0xEF, - 'data' : pickle.dumps(dict_data), - 'ex_data' : None, - } - self.chunks.insert (last_app_chunk+1, dflchunk) - - def get_face_type(self): return self.dfl_dict['face_type'] - def get_landmarks(self): return np.array ( self.dfl_dict['landmarks'] ) - def get_ie_polys(self): return IEPolys.load(self.dfl_dict.get('ie_polys',None)) - def get_source_filename(self): return self.dfl_dict['source_filename'] - def get_source_rect(self): return self.dfl_dict['source_rect'] - def get_source_landmarks(self): return np.array ( self.dfl_dict['source_landmarks'] ) - def get_image_to_face_mat(self): - mat = self.dfl_dict.get ('image_to_face_mat', None) - if mat is not None: - return np.array (mat) - return None - def get_fanseg_mask(self): - fanseg_mask = self.dfl_dict.get ('fanseg_mask', None) - if fanseg_mask is not None: - return np.clip ( np.array (fanseg_mask) / 255.0, 0.0, 1.0 )[...,np.newaxis] - return None - def get_pitch_yaw_roll(self): - return self.dfl_dict.get ('pitch_yaw_roll', None) - +import pickle +import struct + +import cv2 +import numpy as np + +from facelib import FaceType +from imagelib import IEPolys +from utils.struct_utils import * +from interact import interact as io + +class DFLJPG(object): + def __init__(self): + self.data = b"" + self.length = 0 + self.chunks = [] + self.dfl_dict = None + self.shape = (0,0,0) + + @staticmethod + def load_raw(filename): + try: + with open(filename, "rb") as f: + data = f.read() + except: + raise FileNotFoundError(filename) + + try: + inst = DFLJPG() + inst.data = data + inst.length = len(data) + inst_length = inst.length + chunks = [] + data_counter = 0 + while data_counter < inst_length: + chunk_m_l, chunk_m_h = struct.unpack ("BB", data[data_counter:data_counter+2]) + data_counter += 2 + + if chunk_m_l != 0xFF: + raise ValueError("No Valid JPG info") + + chunk_name = None + chunk_size = None + chunk_data = None + chunk_ex_data = None + is_unk_chunk = False + + if chunk_m_h & 0xF0 == 0xD0: + n = chunk_m_h & 0x0F + + if n >= 0 and n <= 7: + chunk_name = "RST%d" % (n) + chunk_size = 0 + elif n == 0x8: + chunk_name = "SOI" + chunk_size = 0 + if len(chunks) != 0: + raise Exception("") + elif n == 0x9: + chunk_name = "EOI" + chunk_size = 0 + elif n == 0xA: + chunk_name = "SOS" + elif n == 0xB: + chunk_name = "DQT" + elif n == 0xD: + chunk_name = "DRI" + chunk_size = 2 + else: + is_unk_chunk = True + elif chunk_m_h & 0xF0 == 0xC0: + n = chunk_m_h & 0x0F + if n == 0: + chunk_name = "SOF0" + elif n == 2: + chunk_name = "SOF2" + elif n == 4: + chunk_name = "DHT" + else: + is_unk_chunk = True + elif chunk_m_h & 0xF0 == 0xE0: + n = chunk_m_h & 0x0F + chunk_name = "APP%d" % (n) + else: + is_unk_chunk = True + + if is_unk_chunk: + raise ValueError("Unknown chunk %X" % (chunk_m_h) ) + + if chunk_size == None: #variable size + chunk_size, = struct.unpack (">H", data[data_counter:data_counter+2]) + chunk_size -= 2 + data_counter += 2 + + if chunk_size > 0: + chunk_data = data[data_counter:data_counter+chunk_size] + data_counter += chunk_size + + if chunk_name == "SOS": + c = data_counter + while c < inst_length and (data[c] != 0xFF or data[c+1] != 0xD9): + c += 1 + + chunk_ex_data = data[data_counter:c] + data_counter = c + + chunks.append ({'name' : chunk_name, + 'm_h' : chunk_m_h, + 'data' : chunk_data, + 'ex_data' : chunk_ex_data, + }) + inst.chunks = chunks + + return inst + except Exception as e: + raise Exception ("Corrupted JPG file: %s" % (str(e))) + + @staticmethod + def load(filename): + try: + inst = DFLJPG.load_raw (filename) + inst.dfl_dict = None + + for chunk in inst.chunks: + if chunk['name'] == 'APP0': + d, c = chunk['data'], 0 + c, id, _ = struct_unpack (d, c, "=4sB") + + if id == b"JFIF": + c, ver_major, ver_minor, units, Xdensity, Ydensity, Xthumbnail, Ythumbnail = struct_unpack (d, c, "=BBBHHBB") + #if units == 0: + # inst.shape = (Ydensity, Xdensity, 3) + else: + raise Exception("Unknown jpeg ID: %s" % (id) ) + elif chunk['name'] == 'SOF0' or chunk['name'] == 'SOF2': + d, c = chunk['data'], 0 + c, precision, height, width = struct_unpack (d, c, ">BHH") + inst.shape = (height, width, 3) + + elif chunk['name'] == 'APP15': + if type(chunk['data']) == bytes: + inst.dfl_dict = pickle.loads(chunk['data']) + + if (inst.dfl_dict is not None): + if 'face_type' not in inst.dfl_dict: + inst.dfl_dict['face_type'] = FaceType.toString (FaceType.FULL) + + if 'fanseg_mask' in inst.dfl_dict: + fanseg_mask = inst.dfl_dict['fanseg_mask'] + if fanseg_mask is not None: + numpyarray = np.asarray( inst.dfl_dict['fanseg_mask'], dtype=np.uint8) + inst.dfl_dict['fanseg_mask'] = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED) + + if inst.dfl_dict == None: + return None + + return inst + except Exception as e: + print (e) + return None + + @staticmethod + def embed_data(filename, face_type=None, + landmarks=None, + ie_polys=None, + source_filename=None, + source_rect=None, + source_landmarks=None, + image_to_face_mat=None, + fanseg_mask=None, + pitch_yaw_roll=None, + **kwargs + ): + + if fanseg_mask is not None: + fanseg_mask = np.clip ( (fanseg_mask*255).astype(np.uint8), 0, 255 ) + + ret, buf = cv2.imencode( '.jpg', fanseg_mask, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) + + if ret and len(buf) < 60000: + fanseg_mask = buf + else: + io.log_err("Unable to encode fanseg_mask for %s" % (filename) ) + fanseg_mask = None + + inst = DFLJPG.load_raw (filename) + inst.setDFLDictData ({ + 'face_type': face_type, + 'landmarks': landmarks, + 'ie_polys' : ie_polys.dump() if ie_polys is not None else None, + 'source_filename': source_filename, + 'source_rect': source_rect, + 'source_landmarks': source_landmarks, + 'image_to_face_mat': image_to_face_mat, + 'fanseg_mask' : fanseg_mask, + 'pitch_yaw_roll' : pitch_yaw_roll + }) + + try: + with open(filename, "wb") as f: + f.write ( inst.dump() ) + except: + raise Exception( 'cannot save %s' % (filename) ) + + def embed_and_set(self, filename, face_type=None, + landmarks=None, + ie_polys=None, + source_filename=None, + source_rect=None, + source_landmarks=None, + image_to_face_mat=None, + fanseg_mask=None, + pitch_yaw_roll=None, + **kwargs + ): + if face_type is None: face_type = self.get_face_type() + if landmarks is None: landmarks = self.get_landmarks() + if ie_polys is None: ie_polys = self.get_ie_polys() + if source_filename is None: source_filename = self.get_source_filename() + if source_rect is None: source_rect = self.get_source_rect() + if source_landmarks is None: source_landmarks = self.get_source_landmarks() + if image_to_face_mat is None: image_to_face_mat = self.get_image_to_face_mat() + if fanseg_mask is None: fanseg_mask = self.get_fanseg_mask() + if pitch_yaw_roll is None: pitch_yaw_roll = self.get_pitch_yaw_roll() + DFLJPG.embed_data (filename, face_type=face_type, + landmarks=landmarks, + ie_polys=ie_polys, + source_filename=source_filename, + source_rect=source_rect, + source_landmarks=source_landmarks, + image_to_face_mat=image_to_face_mat, + fanseg_mask=fanseg_mask, + pitch_yaw_roll=pitch_yaw_roll) + def remove_fanseg_mask(self): + self.dfl_dict['fanseg_mask'] = None + + def dump(self): + data = b"" + + for chunk in self.chunks: + data += struct.pack ("BB", 0xFF, chunk['m_h'] ) + chunk_data = chunk['data'] + if chunk_data is not None: + data += struct.pack (">H", len(chunk_data)+2 ) + data += chunk_data + + chunk_ex_data = chunk['ex_data'] + if chunk_ex_data is not None: + data += chunk_ex_data + + return data + + def get_shape(self): + return self.shape + + def get_height(self): + for chunk in self.chunks: + if type(chunk) == IHDR: + return chunk.height + return 0 + + def getDFLDictData(self): + return self.dfl_dict + + def setDFLDictData (self, dict_data=None): + self.dfl_dict = dict_data + + for chunk in self.chunks: + if chunk['name'] == 'APP15': + self.chunks.remove(chunk) + break + + last_app_chunk = 0 + for i, chunk in enumerate (self.chunks): + if chunk['m_h'] & 0xF0 == 0xE0: + last_app_chunk = i + + dflchunk = {'name' : 'APP15', + 'm_h' : 0xEF, + 'data' : pickle.dumps(dict_data), + 'ex_data' : None, + } + self.chunks.insert (last_app_chunk+1, dflchunk) + + def get_face_type(self): return self.dfl_dict['face_type'] + def get_landmarks(self): return np.array ( self.dfl_dict['landmarks'] ) + def get_ie_polys(self): return IEPolys.load(self.dfl_dict.get('ie_polys',None)) + def get_source_filename(self): return self.dfl_dict['source_filename'] + def get_source_rect(self): return self.dfl_dict['source_rect'] + def get_source_landmarks(self): return np.array ( self.dfl_dict['source_landmarks'] ) + def get_image_to_face_mat(self): + mat = self.dfl_dict.get ('image_to_face_mat', None) + if mat is not None: + return np.array (mat) + return None + def get_fanseg_mask(self): + fanseg_mask = self.dfl_dict.get ('fanseg_mask', None) + if fanseg_mask is not None: + return np.clip ( np.array (fanseg_mask) / 255.0, 0.0, 1.0 )[...,np.newaxis] + return None + def get_pitch_yaw_roll(self): + return self.dfl_dict.get ('pitch_yaw_roll', None) + diff --git a/utils/DFLPNG.py b/utils/DFLPNG.py index daff714..d1fb76e 100644 --- a/utils/DFLPNG.py +++ b/utils/DFLPNG.py @@ -1,410 +1,410 @@ -import pickle -import string -import struct -import zlib - -import cv2 -import numpy as np - -from facelib import FaceType -from imagelib import IEPolys - -PNG_HEADER = b"\x89PNG\r\n\x1a\n" - -class Chunk(object): - def __init__(self, name=None, data=None): - self.length = 0 - self.crc = 0 - self.name = name if name else "noNe" - self.data = data if data else b"" - - @classmethod - def load(cls, data): - """Load a chunk including header and footer""" - inst = cls() - if len(data) < 12: - msg = "Chunk-data too small" - raise ValueError(msg) - - # chunk header & data - (inst.length, raw_name) = struct.unpack("!I4s", data[0:8]) - inst.data = data[8:-4] - inst.verify_length() - inst.name = raw_name.decode("ascii") - inst.verify_name() - - # chunk crc - inst.crc = struct.unpack("!I", data[8+inst.length:8+inst.length+4])[0] - inst.verify_crc() - - return inst - - def dump(self, auto_crc=True, auto_length=True): - """Return the chunk including header and footer""" - if auto_length: self.update_length() - if auto_crc: self.update_crc() - self.verify_name() - return struct.pack("!I", self.length) + self.get_raw_name() + self.data + struct.pack("!I", self.crc) - - def verify_length(self): - if len(self.data) != self.length: - msg = "Data length ({}) does not match length in chunk header ({})".format(len(self.data), self.length) - raise ValueError(msg) - return True - - def verify_name(self): - for c in self.name: - if c not in string.ascii_letters: - msg = "Invalid character in chunk name: {}".format(repr(self.name)) - raise ValueError(msg) - return True - - def verify_crc(self): - calculated_crc = self.get_crc() - if self.crc != calculated_crc: - msg = "CRC mismatch: {:08X} (header), {:08X} (calculated)".format(self.crc, calculated_crc) - raise ValueError(msg) - return True - - def update_length(self): - self.length = len(self.data) - - def update_crc(self): - self.crc = self.get_crc() - - def get_crc(self): - return zlib.crc32(self.get_raw_name() + self.data) - - def get_raw_name(self): - return self.name if isinstance(self.name, bytes) else self.name.encode("ascii") - - # name helper methods - - def ancillary(self, set=None): - """Set and get ancillary=True/critical=False bit""" - if set is True: - self.name[0] = self.name[0].lower() - elif set is False: - self.name[0] = self.name[0].upper() - return self.name[0].islower() - - def private(self, set=None): - """Set and get private=True/public=False bit""" - if set is True: - self.name[1] = self.name[1].lower() - elif set is False: - self.name[1] = self.name[1].upper() - return self.name[1].islower() - - def reserved(self, set=None): - """Set and get reserved_valid=True/invalid=False bit""" - if set is True: - self.name[2] = self.name[2].upper() - elif set is False: - self.name[2] = self.name[2].lower() - return self.name[2].isupper() - - def safe_to_copy(self, set=None): - """Set and get save_to_copy=True/unsafe=False bit""" - if set is True: - self.name[3] = self.name[3].lower() - elif set is False: - self.name[3] = self.name[3].upper() - return self.name[3].islower() - - def __str__(self): - return "".format(**self.__dict__) - -class IHDR(Chunk): - """IHDR Chunk - width, height, bit_depth, color_type, compression_method, - filter_method, interlace_method contain the data extracted - from the chunk. Modify those and use and build() to recreate - the chunk. Valid values for bit_depth depend on the color_type - and can be looked up in color_types or in the PNG specification - - See: - http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.IHDR - """ - # color types with name & allowed bit depths - COLOR_TYPE_GRAY = 0 - COLOR_TYPE_RGB = 2 - COLOR_TYPE_PLTE = 3 - COLOR_TYPE_GRAYA = 4 - COLOR_TYPE_RGBA = 6 - color_types = { - COLOR_TYPE_GRAY: ("Grayscale", (1,2,4,8,16)), - COLOR_TYPE_RGB: ("RGB", (8,16)), - COLOR_TYPE_PLTE: ("Palette", (1,2,4,8)), - COLOR_TYPE_GRAYA: ("Greyscale+Alpha", (8,16)), - COLOR_TYPE_RGBA: ("RGBA", (8,16)), - } - - def __init__(self, width=0, height=0, bit_depth=8, color_type=2, \ - compression_method=0, filter_method=0, interlace_method=0): - self.width = width - self.height = height - self.bit_depth = bit_depth - self.color_type = color_type - self.compression_method = compression_method - self.filter_method = filter_method - self.interlace_method = interlace_method - super().__init__("IHDR") - - @classmethod - def load(cls, data): - inst = super().load(data) - fields = struct.unpack("!IIBBBBB", inst.data) - inst.width = fields[0] - inst.height = fields[1] - inst.bit_depth = fields[2] # per channel - inst.color_type = fields[3] # see specs - inst.compression_method = fields[4] # always 0(=deflate/inflate) - inst.filter_method = fields[5] # always 0(=adaptive filtering with 5 methods) - inst.interlace_method = fields[6] # 0(=no interlace) or 1(=Adam7 interlace) - return inst - - def dump(self): - self.data = struct.pack("!IIBBBBB", \ - self.width, self.height, self.bit_depth, self.color_type, \ - self.compression_method, self.filter_method, self.interlace_method) - return super().dump() - - def __str__(self): - return "" \ - .format(self.color_types[self.color_type][0], **self.__dict__) - -class IEND(Chunk): - def __init__(self): - super().__init__("IEND") - - def dump(self): - if len(self.data) != 0: - msg = "IEND has data which is not allowed" - raise ValueError(msg) - if self.length != 0: - msg = "IEND data lenght is not 0 which is not allowed" - raise ValueError(msg) - return super().dump() - - def __str__(self): - return "".format(**self.__dict__) - -class DFLChunk(Chunk): - def __init__(self, dict_data=None): - super().__init__("fcWp") - self.dict_data = dict_data - - def setDictData(self, dict_data): - self.dict_data = dict_data - - def getDictData(self): - return self.dict_data - - @classmethod - def load(cls, data): - inst = super().load(data) - inst.dict_data = pickle.loads( inst.data ) - return inst - - def dump(self): - self.data = pickle.dumps (self.dict_data) - return super().dump() - -chunk_map = { - b"IHDR": IHDR, - b"fcWp": DFLChunk, - b"IEND": IEND -} - -class DFLPNG(object): - def __init__(self): - self.data = b"" - self.length = 0 - self.chunks = [] - self.dfl_dict = None - - @staticmethod - def load_raw(filename): - try: - with open(filename, "rb") as f: - data = f.read() - except: - raise FileNotFoundError(filename) - - inst = DFLPNG() - inst.data = data - inst.length = len(data) - - if data[0:8] != PNG_HEADER: - msg = "No Valid PNG header" - raise ValueError(msg) - - chunk_start = 8 - while chunk_start < inst.length: - (chunk_length, chunk_name) = struct.unpack("!I4s", data[chunk_start:chunk_start+8]) - chunk_end = chunk_start + chunk_length + 12 - - chunk = chunk_map.get(chunk_name, Chunk).load(data[chunk_start:chunk_end]) - inst.chunks.append(chunk) - chunk_start = chunk_end - - return inst - - @staticmethod - def load(filename): - try: - inst = DFLPNG.load_raw (filename) - inst.dfl_dict = inst.getDFLDictData() - - if inst.dfl_dict is not None: - if 'face_type' not in inst.dfl_dict: - inst.dfl_dict['face_type'] = FaceType.toString (FaceType.FULL) - - if 'fanseg_mask' in inst.dfl_dict: - fanseg_mask = inst.dfl_dict['fanseg_mask'] - if fanseg_mask is not None: - numpyarray = np.asarray( inst.dfl_dict['fanseg_mask'], dtype=np.uint8) - inst.dfl_dict['fanseg_mask'] = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED) - - if inst.dfl_dict == None: - return None - - return inst - except Exception as e: - print(e) - return None - - @staticmethod - def embed_data(filename, face_type=None, - landmarks=None, - ie_polys=None, - source_filename=None, - source_rect=None, - source_landmarks=None, - image_to_face_mat=None, - fanseg_mask=None, - pitch_yaw_roll=None, - **kwargs - ): - - if fanseg_mask is not None: - fanseg_mask = np.clip ( (fanseg_mask*255).astype(np.uint8), 0, 255 ) - - ret, buf = cv2.imencode( '.jpg', fanseg_mask, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) - - if ret and len(buf) < 60000: - fanseg_mask = buf - else: - io.log_err("Unable to encode fanseg_mask for %s" % (filename) ) - fanseg_mask = None - - inst = DFLPNG.load_raw (filename) - inst.setDFLDictData ({ - 'face_type': face_type, - 'landmarks': landmarks, - 'ie_polys' : ie_polys.dump() if ie_polys is not None else None, - 'source_filename': source_filename, - 'source_rect': source_rect, - 'source_landmarks': source_landmarks, - 'image_to_face_mat':image_to_face_mat, - 'fanseg_mask' : fanseg_mask, - 'pitch_yaw_roll' : pitch_yaw_roll - }) - - try: - with open(filename, "wb") as f: - f.write ( inst.dump() ) - except: - raise Exception( 'cannot save %s' % (filename) ) - - def embed_and_set(self, filename, face_type=None, - landmarks=None, - ie_polys=None, - source_filename=None, - source_rect=None, - source_landmarks=None, - image_to_face_mat=None, - fanseg_mask=None, - pitch_yaw_roll=None, - **kwargs - ): - if face_type is None: face_type = self.get_face_type() - if landmarks is None: landmarks = self.get_landmarks() - if ie_polys is None: ie_polys = self.get_ie_polys() - if source_filename is None: source_filename = self.get_source_filename() - if source_rect is None: source_rect = self.get_source_rect() - if source_landmarks is None: source_landmarks = self.get_source_landmarks() - if image_to_face_mat is None: image_to_face_mat = self.get_image_to_face_mat() - if fanseg_mask is None: fanseg_mask = self.get_fanseg_mask() - if pitch_yaw_roll is None: pitch_yaw_roll = self.get_pitch_yaw_roll() - DFLPNG.embed_data (filename, face_type=face_type, - landmarks=landmarks, - ie_polys=ie_polys, - source_filename=source_filename, - source_rect=source_rect, - source_landmarks=source_landmarks, - image_to_face_mat=image_to_face_mat, - fanseg_mask=fanseg_mask, - pitch_yaw_roll=pitch_yaw_roll) - - def remove_fanseg_mask(self): - self.dfl_dict['fanseg_mask'] = None - - def dump(self): - data = PNG_HEADER - for chunk in self.chunks: - data += chunk.dump() - return data - - def get_shape(self): - for chunk in self.chunks: - if type(chunk) == IHDR: - c = 3 if chunk.color_type == IHDR.COLOR_TYPE_RGB else 4 - w = chunk.width - h = chunk.height - return (h,w,c) - return (0,0,0) - - def get_height(self): - for chunk in self.chunks: - if type(chunk) == IHDR: - return chunk.height - return 0 - - def getDFLDictData(self): - for chunk in self.chunks: - if type(chunk) == DFLChunk: - return chunk.getDictData() - return None - - def setDFLDictData (self, dict_data=None): - for chunk in self.chunks: - if type(chunk) == DFLChunk: - self.chunks.remove(chunk) - break - - if not dict_data is None: - chunk = DFLChunk(dict_data) - self.chunks.insert(-1, chunk) - - def get_face_type(self): return self.dfl_dict['face_type'] - def get_landmarks(self): return np.array ( self.dfl_dict['landmarks'] ) - def get_ie_polys(self): return IEPolys.load(self.dfl_dict.get('ie_polys',None)) - def get_source_filename(self): return self.dfl_dict['source_filename'] - def get_source_rect(self): return self.dfl_dict['source_rect'] - def get_source_landmarks(self): return np.array ( self.dfl_dict['source_landmarks'] ) - def get_image_to_face_mat(self): - mat = self.dfl_dict.get ('image_to_face_mat', None) - if mat is not None: - return np.array (mat) - return None - def get_fanseg_mask(self): - fanseg_mask = self.dfl_dict.get ('fanseg_mask', None) - if fanseg_mask is not None: - return np.clip ( np.array (fanseg_mask) / 255.0, 0.0, 1.0 )[...,np.newaxis] - return None - def get_pitch_yaw_roll(self): - return self.dfl_dict.get ('pitch_yaw_roll', None) - def __str__(self): - return "".format(len(self.chunks), **self.__dict__) +import pickle +import string +import struct +import zlib + +import cv2 +import numpy as np + +from facelib import FaceType +from imagelib import IEPolys + +PNG_HEADER = b"\x89PNG\r\n\x1a\n" + +class Chunk(object): + def __init__(self, name=None, data=None): + self.length = 0 + self.crc = 0 + self.name = name if name else "noNe" + self.data = data if data else b"" + + @classmethod + def load(cls, data): + """Load a chunk including header and footer""" + inst = cls() + if len(data) < 12: + msg = "Chunk-data too small" + raise ValueError(msg) + + # chunk header & data + (inst.length, raw_name) = struct.unpack("!I4s", data[0:8]) + inst.data = data[8:-4] + inst.verify_length() + inst.name = raw_name.decode("ascii") + inst.verify_name() + + # chunk crc + inst.crc = struct.unpack("!I", data[8+inst.length:8+inst.length+4])[0] + inst.verify_crc() + + return inst + + def dump(self, auto_crc=True, auto_length=True): + """Return the chunk including header and footer""" + if auto_length: self.update_length() + if auto_crc: self.update_crc() + self.verify_name() + return struct.pack("!I", self.length) + self.get_raw_name() + self.data + struct.pack("!I", self.crc) + + def verify_length(self): + if len(self.data) != self.length: + msg = "Data length ({}) does not match length in chunk header ({})".format(len(self.data), self.length) + raise ValueError(msg) + return True + + def verify_name(self): + for c in self.name: + if c not in string.ascii_letters: + msg = "Invalid character in chunk name: {}".format(repr(self.name)) + raise ValueError(msg) + return True + + def verify_crc(self): + calculated_crc = self.get_crc() + if self.crc != calculated_crc: + msg = "CRC mismatch: {:08X} (header), {:08X} (calculated)".format(self.crc, calculated_crc) + raise ValueError(msg) + return True + + def update_length(self): + self.length = len(self.data) + + def update_crc(self): + self.crc = self.get_crc() + + def get_crc(self): + return zlib.crc32(self.get_raw_name() + self.data) + + def get_raw_name(self): + return self.name if isinstance(self.name, bytes) else self.name.encode("ascii") + + # name helper methods + + def ancillary(self, set=None): + """Set and get ancillary=True/critical=False bit""" + if set is True: + self.name[0] = self.name[0].lower() + elif set is False: + self.name[0] = self.name[0].upper() + return self.name[0].islower() + + def private(self, set=None): + """Set and get private=True/public=False bit""" + if set is True: + self.name[1] = self.name[1].lower() + elif set is False: + self.name[1] = self.name[1].upper() + return self.name[1].islower() + + def reserved(self, set=None): + """Set and get reserved_valid=True/invalid=False bit""" + if set is True: + self.name[2] = self.name[2].upper() + elif set is False: + self.name[2] = self.name[2].lower() + return self.name[2].isupper() + + def safe_to_copy(self, set=None): + """Set and get save_to_copy=True/unsafe=False bit""" + if set is True: + self.name[3] = self.name[3].lower() + elif set is False: + self.name[3] = self.name[3].upper() + return self.name[3].islower() + + def __str__(self): + return "".format(**self.__dict__) + +class IHDR(Chunk): + """IHDR Chunk + width, height, bit_depth, color_type, compression_method, + filter_method, interlace_method contain the data extracted + from the chunk. Modify those and use and build() to recreate + the chunk. Valid values for bit_depth depend on the color_type + and can be looked up in color_types or in the PNG specification + + See: + http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.IHDR + """ + # color types with name & allowed bit depths + COLOR_TYPE_GRAY = 0 + COLOR_TYPE_RGB = 2 + COLOR_TYPE_PLTE = 3 + COLOR_TYPE_GRAYA = 4 + COLOR_TYPE_RGBA = 6 + color_types = { + COLOR_TYPE_GRAY: ("Grayscale", (1,2,4,8,16)), + COLOR_TYPE_RGB: ("RGB", (8,16)), + COLOR_TYPE_PLTE: ("Palette", (1,2,4,8)), + COLOR_TYPE_GRAYA: ("Greyscale+Alpha", (8,16)), + COLOR_TYPE_RGBA: ("RGBA", (8,16)), + } + + def __init__(self, width=0, height=0, bit_depth=8, color_type=2, \ + compression_method=0, filter_method=0, interlace_method=0): + self.width = width + self.height = height + self.bit_depth = bit_depth + self.color_type = color_type + self.compression_method = compression_method + self.filter_method = filter_method + self.interlace_method = interlace_method + super().__init__("IHDR") + + @classmethod + def load(cls, data): + inst = super().load(data) + fields = struct.unpack("!IIBBBBB", inst.data) + inst.width = fields[0] + inst.height = fields[1] + inst.bit_depth = fields[2] # per channel + inst.color_type = fields[3] # see specs + inst.compression_method = fields[4] # always 0(=deflate/inflate) + inst.filter_method = fields[5] # always 0(=adaptive filtering with 5 methods) + inst.interlace_method = fields[6] # 0(=no interlace) or 1(=Adam7 interlace) + return inst + + def dump(self): + self.data = struct.pack("!IIBBBBB", \ + self.width, self.height, self.bit_depth, self.color_type, \ + self.compression_method, self.filter_method, self.interlace_method) + return super().dump() + + def __str__(self): + return "" \ + .format(self.color_types[self.color_type][0], **self.__dict__) + +class IEND(Chunk): + def __init__(self): + super().__init__("IEND") + + def dump(self): + if len(self.data) != 0: + msg = "IEND has data which is not allowed" + raise ValueError(msg) + if self.length != 0: + msg = "IEND data lenght is not 0 which is not allowed" + raise ValueError(msg) + return super().dump() + + def __str__(self): + return "".format(**self.__dict__) + +class DFLChunk(Chunk): + def __init__(self, dict_data=None): + super().__init__("fcWp") + self.dict_data = dict_data + + def setDictData(self, dict_data): + self.dict_data = dict_data + + def getDictData(self): + return self.dict_data + + @classmethod + def load(cls, data): + inst = super().load(data) + inst.dict_data = pickle.loads( inst.data ) + return inst + + def dump(self): + self.data = pickle.dumps (self.dict_data) + return super().dump() + +chunk_map = { + b"IHDR": IHDR, + b"fcWp": DFLChunk, + b"IEND": IEND +} + +class DFLPNG(object): + def __init__(self): + self.data = b"" + self.length = 0 + self.chunks = [] + self.dfl_dict = None + + @staticmethod + def load_raw(filename): + try: + with open(filename, "rb") as f: + data = f.read() + except: + raise FileNotFoundError(filename) + + inst = DFLPNG() + inst.data = data + inst.length = len(data) + + if data[0:8] != PNG_HEADER: + msg = "No Valid PNG header" + raise ValueError(msg) + + chunk_start = 8 + while chunk_start < inst.length: + (chunk_length, chunk_name) = struct.unpack("!I4s", data[chunk_start:chunk_start+8]) + chunk_end = chunk_start + chunk_length + 12 + + chunk = chunk_map.get(chunk_name, Chunk).load(data[chunk_start:chunk_end]) + inst.chunks.append(chunk) + chunk_start = chunk_end + + return inst + + @staticmethod + def load(filename): + try: + inst = DFLPNG.load_raw (filename) + inst.dfl_dict = inst.getDFLDictData() + + if inst.dfl_dict is not None: + if 'face_type' not in inst.dfl_dict: + inst.dfl_dict['face_type'] = FaceType.toString (FaceType.FULL) + + if 'fanseg_mask' in inst.dfl_dict: + fanseg_mask = inst.dfl_dict['fanseg_mask'] + if fanseg_mask is not None: + numpyarray = np.asarray( inst.dfl_dict['fanseg_mask'], dtype=np.uint8) + inst.dfl_dict['fanseg_mask'] = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED) + + if inst.dfl_dict == None: + return None + + return inst + except Exception as e: + print(e) + return None + + @staticmethod + def embed_data(filename, face_type=None, + landmarks=None, + ie_polys=None, + source_filename=None, + source_rect=None, + source_landmarks=None, + image_to_face_mat=None, + fanseg_mask=None, + pitch_yaw_roll=None, + **kwargs + ): + + if fanseg_mask is not None: + fanseg_mask = np.clip ( (fanseg_mask*255).astype(np.uint8), 0, 255 ) + + ret, buf = cv2.imencode( '.jpg', fanseg_mask, [int(cv2.IMWRITE_JPEG_QUALITY), 85] ) + + if ret and len(buf) < 60000: + fanseg_mask = buf + else: + io.log_err("Unable to encode fanseg_mask for %s" % (filename) ) + fanseg_mask = None + + inst = DFLPNG.load_raw (filename) + inst.setDFLDictData ({ + 'face_type': face_type, + 'landmarks': landmarks, + 'ie_polys' : ie_polys.dump() if ie_polys is not None else None, + 'source_filename': source_filename, + 'source_rect': source_rect, + 'source_landmarks': source_landmarks, + 'image_to_face_mat':image_to_face_mat, + 'fanseg_mask' : fanseg_mask, + 'pitch_yaw_roll' : pitch_yaw_roll + }) + + try: + with open(filename, "wb") as f: + f.write ( inst.dump() ) + except: + raise Exception( 'cannot save %s' % (filename) ) + + def embed_and_set(self, filename, face_type=None, + landmarks=None, + ie_polys=None, + source_filename=None, + source_rect=None, + source_landmarks=None, + image_to_face_mat=None, + fanseg_mask=None, + pitch_yaw_roll=None, + **kwargs + ): + if face_type is None: face_type = self.get_face_type() + if landmarks is None: landmarks = self.get_landmarks() + if ie_polys is None: ie_polys = self.get_ie_polys() + if source_filename is None: source_filename = self.get_source_filename() + if source_rect is None: source_rect = self.get_source_rect() + if source_landmarks is None: source_landmarks = self.get_source_landmarks() + if image_to_face_mat is None: image_to_face_mat = self.get_image_to_face_mat() + if fanseg_mask is None: fanseg_mask = self.get_fanseg_mask() + if pitch_yaw_roll is None: pitch_yaw_roll = self.get_pitch_yaw_roll() + DFLPNG.embed_data (filename, face_type=face_type, + landmarks=landmarks, + ie_polys=ie_polys, + source_filename=source_filename, + source_rect=source_rect, + source_landmarks=source_landmarks, + image_to_face_mat=image_to_face_mat, + fanseg_mask=fanseg_mask, + pitch_yaw_roll=pitch_yaw_roll) + + def remove_fanseg_mask(self): + self.dfl_dict['fanseg_mask'] = None + + def dump(self): + data = PNG_HEADER + for chunk in self.chunks: + data += chunk.dump() + return data + + def get_shape(self): + for chunk in self.chunks: + if type(chunk) == IHDR: + c = 3 if chunk.color_type == IHDR.COLOR_TYPE_RGB else 4 + w = chunk.width + h = chunk.height + return (h,w,c) + return (0,0,0) + + def get_height(self): + for chunk in self.chunks: + if type(chunk) == IHDR: + return chunk.height + return 0 + + def getDFLDictData(self): + for chunk in self.chunks: + if type(chunk) == DFLChunk: + return chunk.getDictData() + return None + + def setDFLDictData (self, dict_data=None): + for chunk in self.chunks: + if type(chunk) == DFLChunk: + self.chunks.remove(chunk) + break + + if not dict_data is None: + chunk = DFLChunk(dict_data) + self.chunks.insert(-1, chunk) + + def get_face_type(self): return self.dfl_dict['face_type'] + def get_landmarks(self): return np.array ( self.dfl_dict['landmarks'] ) + def get_ie_polys(self): return IEPolys.load(self.dfl_dict.get('ie_polys',None)) + def get_source_filename(self): return self.dfl_dict['source_filename'] + def get_source_rect(self): return self.dfl_dict['source_rect'] + def get_source_landmarks(self): return np.array ( self.dfl_dict['source_landmarks'] ) + def get_image_to_face_mat(self): + mat = self.dfl_dict.get ('image_to_face_mat', None) + if mat is not None: + return np.array (mat) + return None + def get_fanseg_mask(self): + fanseg_mask = self.dfl_dict.get ('fanseg_mask', None) + if fanseg_mask is not None: + return np.clip ( np.array (fanseg_mask) / 255.0, 0.0, 1.0 )[...,np.newaxis] + return None + def get_pitch_yaw_roll(self): + return self.dfl_dict.get ('pitch_yaw_roll', None) + def __str__(self): + return "".format(len(self.chunks), **self.__dict__) diff --git a/utils/Path_utils.py b/utils/Path_utils.py index 09258a2..c9ed448 100644 --- a/utils/Path_utils.py +++ b/utils/Path_utils.py @@ -1,83 +1,83 @@ -from pathlib import Path -from os import scandir - -image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] - -def get_image_paths(dir_path, image_extensions=image_extensions): - dir_path = Path (dir_path) - - result = [] - if dir_path.exists(): - for x in list(scandir(str(dir_path))): - if any([x.name.lower().endswith(ext) for ext in image_extensions]): - result.append(x.path) - return result - -def get_image_unique_filestem_paths(dir_path, verbose_print_func=None): - result = get_image_paths(dir_path) - result_dup = set() - - for f in result[:]: - f_stem = Path(f).stem - if f_stem in result_dup: - result.remove(f) - if verbose_print_func is not None: - verbose_print_func ("Duplicate filenames are not allowed, skipping: %s" % Path(f).name ) - continue - result_dup.add(f_stem) - - return result - -def get_file_paths(dir_path): - dir_path = Path (dir_path) - - result = [] - if dir_path.exists(): - return [ x.path for x in list(scandir(str(dir_path))) if x.is_file() ] - return result - -def get_all_dir_names (dir_path): - dir_path = Path (dir_path) - - result = [] - if dir_path.exists(): - return [ x.name for x in list(scandir(str(dir_path))) if x.is_dir() ] - - return result - -def get_all_dir_names_startswith (dir_path, startswith): - dir_path = Path (dir_path) - startswith = startswith.lower() - - result = [] - if dir_path.exists(): - for x in list(scandir(str(dir_path))): - if x.name.lower().startswith(startswith): - result.append ( x.name[len(startswith):] ) - return result - -def get_first_file_by_stem (dir_path, stem, exts=None): - dir_path = Path (dir_path) - stem = stem.lower() - - if dir_path.exists(): - for x in list(scandir(str(dir_path))): - if not x.is_file(): - continue - xp = Path(x.path) - if xp.stem.lower() == stem and (exts is None or xp.suffix.lower() in exts): - return xp - - return None - -def move_all_files (src_dir_path, dst_dir_path): - paths = get_file_paths(src_dir_path) - for p in paths: - p = Path(p) - p.rename ( Path(dst_dir_path) / p.name ) - -def delete_all_files (dir_path): - paths = get_file_paths(dir_path) - for p in paths: - p = Path(p) +from pathlib import Path +from os import scandir + +image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + +def get_image_paths(dir_path, image_extensions=image_extensions): + dir_path = Path (dir_path) + + result = [] + if dir_path.exists(): + for x in list(scandir(str(dir_path))): + if any([x.name.lower().endswith(ext) for ext in image_extensions]): + result.append(x.path) + return result + +def get_image_unique_filestem_paths(dir_path, verbose_print_func=None): + result = get_image_paths(dir_path) + result_dup = set() + + for f in result[:]: + f_stem = Path(f).stem + if f_stem in result_dup: + result.remove(f) + if verbose_print_func is not None: + verbose_print_func ("Duplicate filenames are not allowed, skipping: %s" % Path(f).name ) + continue + result_dup.add(f_stem) + + return result + +def get_file_paths(dir_path): + dir_path = Path (dir_path) + + result = [] + if dir_path.exists(): + return [ x.path for x in list(scandir(str(dir_path))) if x.is_file() ] + return result + +def get_all_dir_names (dir_path): + dir_path = Path (dir_path) + + result = [] + if dir_path.exists(): + return [ x.name for x in list(scandir(str(dir_path))) if x.is_dir() ] + + return result + +def get_all_dir_names_startswith (dir_path, startswith): + dir_path = Path (dir_path) + startswith = startswith.lower() + + result = [] + if dir_path.exists(): + for x in list(scandir(str(dir_path))): + if x.name.lower().startswith(startswith): + result.append ( x.name[len(startswith):] ) + return result + +def get_first_file_by_stem (dir_path, stem, exts=None): + dir_path = Path (dir_path) + stem = stem.lower() + + if dir_path.exists(): + for x in list(scandir(str(dir_path))): + if not x.is_file(): + continue + xp = Path(x.path) + if xp.stem.lower() == stem and (exts is None or xp.suffix.lower() in exts): + return xp + + return None + +def move_all_files (src_dir_path, dst_dir_path): + paths = get_file_paths(src_dir_path) + for p in paths: + p = Path(p) + p.rename ( Path(dst_dir_path) / p.name ) + +def delete_all_files (dir_path): + paths = get_file_paths(dir_path) + for p in paths: + p = Path(p) p.unlink() \ No newline at end of file diff --git a/utils/cv2_utils.py b/utils/cv2_utils.py index 63b8ff6..ff8d82a 100644 --- a/utils/cv2_utils.py +++ b/utils/cv2_utils.py @@ -1,22 +1,22 @@ -import cv2 -import numpy as np -from pathlib import Path - -#allows to open non-english characters path -def cv2_imread(filename, flags=cv2.IMREAD_UNCHANGED): - try: - with open(filename, "rb") as stream: - bytes = bytearray(stream.read()) - numpyarray = np.asarray(bytes, dtype=np.uint8) - return cv2.imdecode(numpyarray, flags) - except: - return None - -def cv2_imwrite(filename, img, *args): - ret, buf = cv2.imencode( Path(filename).suffix, img, *args) - if ret == True: - try: - with open(filename, "wb") as stream: - stream.write( buf ) - except: - pass +import cv2 +import numpy as np +from pathlib import Path + +#allows to open non-english characters path +def cv2_imread(filename, flags=cv2.IMREAD_UNCHANGED): + try: + with open(filename, "rb") as stream: + bytes = bytearray(stream.read()) + numpyarray = np.asarray(bytes, dtype=np.uint8) + return cv2.imdecode(numpyarray, flags) + except: + return None + +def cv2_imwrite(filename, img, *args): + ret, buf = cv2.imencode( Path(filename).suffix, img, *args) + if ret == True: + try: + with open(filename, "wb") as stream: + stream.write( buf ) + except: + pass diff --git a/utils/iter_utils.py b/utils/iter_utils.py index 7e54f0b..eb00c55 100644 --- a/utils/iter_utils.py +++ b/utils/iter_utils.py @@ -1,70 +1,70 @@ -import threading -import queue as Queue -import multiprocessing -import time - - -class ThisThreadGenerator(object): - def __init__(self, generator_func, user_param=None): - super().__init__() - self.generator_func = generator_func - self.user_param = user_param - self.initialized = False - - def __iter__(self): - return self - - def __next__(self): - if not self.initialized: - self.initialized = True - self.generator_func = self.generator_func(self.user_param) - - return next(self.generator_func) - -class SubprocessGenerator(object): - def __init__(self, generator_func, user_param=None, prefetch=2): - super().__init__() - self.prefetch = prefetch - self.generator_func = generator_func - self.user_param = user_param - self.sc_queue = multiprocessing.Queue() - self.cs_queue = multiprocessing.Queue() - self.p = None - - def process_func(self, user_param): - self.generator_func = self.generator_func(user_param) - while True: - while self.prefetch > -1: - try: - gen_data = next (self.generator_func) - except StopIteration: - self.cs_queue.put (None) - return - self.cs_queue.put (gen_data) - self.prefetch -= 1 - self.sc_queue.get() - self.prefetch += 1 - - def __iter__(self): - return self - - def __getstate__(self): - self_dict = self.__dict__.copy() - del self_dict['p'] - return self_dict - - def __next__(self): - if self.p == None: - user_param = self.user_param - self.user_param = None - self.p = multiprocessing.Process(target=self.process_func, args=(user_param,) ) - self.p.daemon = True - self.p.start() - - gen_data = self.cs_queue.get() - if gen_data is None: - self.p.terminate() - self.p.join() - raise StopIteration() - self.sc_queue.put (1) - return gen_data +import threading +import queue as Queue +import multiprocessing +import time + + +class ThisThreadGenerator(object): + def __init__(self, generator_func, user_param=None): + super().__init__() + self.generator_func = generator_func + self.user_param = user_param + self.initialized = False + + def __iter__(self): + return self + + def __next__(self): + if not self.initialized: + self.initialized = True + self.generator_func = self.generator_func(self.user_param) + + return next(self.generator_func) + +class SubprocessGenerator(object): + def __init__(self, generator_func, user_param=None, prefetch=2): + super().__init__() + self.prefetch = prefetch + self.generator_func = generator_func + self.user_param = user_param + self.sc_queue = multiprocessing.Queue() + self.cs_queue = multiprocessing.Queue() + self.p = None + + def process_func(self, user_param): + self.generator_func = self.generator_func(user_param) + while True: + while self.prefetch > -1: + try: + gen_data = next (self.generator_func) + except StopIteration: + self.cs_queue.put (None) + return + self.cs_queue.put (gen_data) + self.prefetch -= 1 + self.sc_queue.get() + self.prefetch += 1 + + def __iter__(self): + return self + + def __getstate__(self): + self_dict = self.__dict__.copy() + del self_dict['p'] + return self_dict + + def __next__(self): + if self.p == None: + user_param = self.user_param + self.user_param = None + self.p = multiprocessing.Process(target=self.process_func, args=(user_param,) ) + self.p.daemon = True + self.p.start() + + gen_data = self.cs_queue.get() + if gen_data is None: + self.p.terminate() + self.p.join() + raise StopIteration() + self.sc_queue.put (1) + return gen_data diff --git a/utils/os_utils.py b/utils/os_utils.py index 0ce7c75..ff1bd98 100644 --- a/utils/os_utils.py +++ b/utils/os_utils.py @@ -1,25 +1,25 @@ -import os -import sys - -if sys.platform[0:3] == 'win': - from ctypes import windll - from ctypes import wintypes - -def set_process_lowest_prio(): - try: - if sys.platform[0:3] == 'win': - GetCurrentProcess = windll.kernel32.GetCurrentProcess - GetCurrentProcess.restype = wintypes.HANDLE - SetPriorityClass = windll.kernel32.SetPriorityClass - SetPriorityClass.argtypes = (wintypes.HANDLE, wintypes.DWORD) - SetPriorityClass ( GetCurrentProcess(), 0x00000040 ) - elif 'darwin' in sys.platform: - os.nice(10) - elif 'linux' in sys.platform: - os.nice(20) - except: - print("Unable to set lowest process priority") - -def set_process_dpi_aware(): - if sys.platform[0:3] == 'win': - windll.user32.SetProcessDPIAware(True) +import os +import sys + +if sys.platform[0:3] == 'win': + from ctypes import windll + from ctypes import wintypes + +def set_process_lowest_prio(): + try: + if sys.platform[0:3] == 'win': + GetCurrentProcess = windll.kernel32.GetCurrentProcess + GetCurrentProcess.restype = wintypes.HANDLE + SetPriorityClass = windll.kernel32.SetPriorityClass + SetPriorityClass.argtypes = (wintypes.HANDLE, wintypes.DWORD) + SetPriorityClass ( GetCurrentProcess(), 0x00000040 ) + elif 'darwin' in sys.platform: + os.nice(10) + elif 'linux' in sys.platform: + os.nice(20) + except: + print("Unable to set lowest process priority") + +def set_process_dpi_aware(): + if sys.platform[0:3] == 'win': + windll.user32.SetProcessDPIAware(True) diff --git a/utils/pickle_utils.py b/utils/pickle_utils.py index 1f5f9dd..37c4c72 100644 --- a/utils/pickle_utils.py +++ b/utils/pickle_utils.py @@ -1,9 +1,9 @@ -class AntiPickler(): - def __init__(self, obj): - self.obj = obj - - def __getstate__(self): - return dict() - - def __setstate__(self, d): +class AntiPickler(): + def __init__(self, obj): + self.obj = obj + + def __getstate__(self): + return dict() + + def __setstate__(self, d): self.__dict__.update(d) \ No newline at end of file diff --git a/utils/random_utils.py b/utils/random_utils.py index 1891a67..7b3af6e 100644 --- a/utils/random_utils.py +++ b/utils/random_utils.py @@ -1,14 +1,14 @@ -import numpy as np - -def random_normal( size=(1,), trunc_val = 2.5 ): - len = np.array(size).prod() - result = np.empty ( (len,) , dtype=np.float32) - - for i in range (len): - while True: - x = np.random.normal() - if x >= -trunc_val and x <= trunc_val: - break - result[i] = (x / trunc_val) - - return result.reshape ( size ) +import numpy as np + +def random_normal( size=(1,), trunc_val = 2.5 ): + len = np.array(size).prod() + result = np.empty ( (len,) , dtype=np.float32) + + for i in range (len): + while True: + x = np.random.normal() + if x >= -trunc_val and x <= trunc_val: + break + result[i] = (x / trunc_val) + + return result.reshape ( size ) diff --git a/utils/std_utils.py b/utils/std_utils.py index 3e977fa..2f23be9 100644 --- a/utils/std_utils.py +++ b/utils/std_utils.py @@ -1,36 +1,36 @@ -import os -import sys - -class suppress_stdout_stderr(object): - def __enter__(self): - self.outnull_file = open(os.devnull, 'w') - self.errnull_file = open(os.devnull, 'w') - - self.old_stdout_fileno_undup = sys.stdout.fileno() - self.old_stderr_fileno_undup = sys.stderr.fileno() - - self.old_stdout_fileno = os.dup ( sys.stdout.fileno() ) - self.old_stderr_fileno = os.dup ( sys.stderr.fileno() ) - - self.old_stdout = sys.stdout - self.old_stderr = sys.stderr - - os.dup2 ( self.outnull_file.fileno(), self.old_stdout_fileno_undup ) - os.dup2 ( self.errnull_file.fileno(), self.old_stderr_fileno_undup ) - - sys.stdout = self.outnull_file - sys.stderr = self.errnull_file - return self - - def __exit__(self, *_): - sys.stdout = self.old_stdout - sys.stderr = self.old_stderr - - os.dup2 ( self.old_stdout_fileno, self.old_stdout_fileno_undup ) - os.dup2 ( self.old_stderr_fileno, self.old_stderr_fileno_undup ) - - os.close ( self.old_stdout_fileno ) - os.close ( self.old_stderr_fileno ) - - self.outnull_file.close() - self.errnull_file.close() +import os +import sys + +class suppress_stdout_stderr(object): + def __enter__(self): + self.outnull_file = open(os.devnull, 'w') + self.errnull_file = open(os.devnull, 'w') + + self.old_stdout_fileno_undup = sys.stdout.fileno() + self.old_stderr_fileno_undup = sys.stderr.fileno() + + self.old_stdout_fileno = os.dup ( sys.stdout.fileno() ) + self.old_stderr_fileno = os.dup ( sys.stderr.fileno() ) + + self.old_stdout = sys.stdout + self.old_stderr = sys.stderr + + os.dup2 ( self.outnull_file.fileno(), self.old_stdout_fileno_undup ) + os.dup2 ( self.errnull_file.fileno(), self.old_stderr_fileno_undup ) + + sys.stdout = self.outnull_file + sys.stderr = self.errnull_file + return self + + def __exit__(self, *_): + sys.stdout = self.old_stdout + sys.stderr = self.old_stderr + + os.dup2 ( self.old_stdout_fileno, self.old_stdout_fileno_undup ) + os.dup2 ( self.old_stderr_fileno, self.old_stderr_fileno_undup ) + + os.close ( self.old_stdout_fileno ) + os.close ( self.old_stderr_fileno ) + + self.outnull_file.close() + self.errnull_file.close() diff --git a/utils/struct_utils.py b/utils/struct_utils.py index ff79a39..cc63559 100644 --- a/utils/struct_utils.py +++ b/utils/struct_utils.py @@ -1,5 +1,5 @@ -import struct - -def struct_unpack(data, counter, fmt): - fmt_size = struct.calcsize(fmt) - return (counter+fmt_size,) + struct.unpack (fmt, data[counter:counter+fmt_size]) +import struct + +def struct_unpack(data, counter, fmt): + fmt_size = struct.calcsize(fmt) + return (counter+fmt_size,) + struct.unpack (fmt, data[counter:counter+fmt_size])