update

3 years ago · 32c9f31e7b
parent c08e84e872
commit 32c9f31e7b
33 changed files with 1617 additions and 1 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
 # Default ignored files
 /shelf/
 /workspace.xml
 # Datasource local storage ignored files
 /dataSources/
 /dataSources.local.xml
 # Editor-based HTTP Client requests
 /httpRequests/
--- a/.idea/DCT-Net.iml
+++ b/.idea/DCT-Net.iml
@ -0,0 +1,11 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="TestRunnerService">
    <option name="PROJECT_TEST_RUNNER" value="pytest" />
  </component>
 </module>
--- a/.idea/deployment.xml
+++ b/.idea/deployment.xml
@ -0,0 +1,15 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="PublishConfigData" autoUpload="Always" serverName="11.160.138.24">
    <serverData>
      <paths name="11.160.138.24">
        <serverdata>
          <mappings>
            <mapping deploy="/" local="$PROJECT_DIR$" web="/" />
          </mappings>
        </serverdata>
      </paths>
    </serverData>
    <option name="myAutoUpload" value="ALWAYS" />
  </component>
 </project>
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@ -0,0 +1,110 @@
 <component name="InspectionProjectProfileManager">
  <profile version="1.0">
    <option name="myName" value="Project Default" />
    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
      <option name="ignoredPackages">
        <value>
          <list size="97">
            <item index="0" class="java.lang.String" itemvalue="GPUtil" />
            <item index="1" class="java.lang.String" itemvalue="torch" />
            <item index="2" class="java.lang.String" itemvalue="torchvision" />
            <item index="3" class="java.lang.String" itemvalue="pandas" />
            <item index="4" class="java.lang.String" itemvalue="scikit-image" />
            <item index="5" class="java.lang.String" itemvalue="scipy" />
            <item index="6" class="java.lang.String" itemvalue="opencv-python" />
            <item index="7" class="java.lang.String" itemvalue="numpy" />
            <item index="8" class="java.lang.String" itemvalue="Pillow" />
            <item index="9" class="java.lang.String" itemvalue="protobuf" />
            <item index="10" class="java.lang.String" itemvalue="decorator" />
            <item index="11" class="java.lang.String" itemvalue="networkx" />
            <item index="12" class="java.lang.String" itemvalue="scikit-learn" />
            <item index="13" class="java.lang.String" itemvalue="python-dateutil" />
            <item index="14" class="java.lang.String" itemvalue="imageio-ffmpeg" />
            <item index="15" class="java.lang.String" itemvalue="cloudpickle" />
            <item index="16" class="java.lang.String" itemvalue="requests" />
            <item index="17" class="java.lang.String" itemvalue="PyWavelets" />
            <item index="18" class="java.lang.String" itemvalue="certifi" />
            <item index="19" class="java.lang.String" itemvalue="urllib3" />
            <item index="20" class="java.lang.String" itemvalue="pyparsing" />
            <item index="21" class="java.lang.String" itemvalue="six" />
            <item index="22" class="java.lang.String" itemvalue="ffmpeg-python" />
            <item index="23" class="java.lang.String" itemvalue="kiwisolver" />
            <item index="24" class="java.lang.String" itemvalue="tqdm" />
            <item index="25" class="java.lang.String" itemvalue="imageio" />
            <item index="26" class="java.lang.String" itemvalue="toolz" />
            <item index="27" class="java.lang.String" itemvalue="future" />
            <item index="28" class="java.lang.String" itemvalue="matplotlib" />
            <item index="29" class="java.lang.String" itemvalue="tensorboardX" />
            <item index="30" class="java.lang.String" itemvalue="dask" />
            <item index="31" class="java.lang.String" itemvalue="pytz" />
            <item index="32" class="java.lang.String" itemvalue="idna" />
            <item index="33" class="java.lang.String" itemvalue="PyYAML" />
            <item index="34" class="java.lang.String" itemvalue="cffi" />
            <item index="35" class="java.lang.String" itemvalue="pycparser" />
            <item index="36" class="java.lang.String" itemvalue="pygit" />
            <item index="37" class="java.lang.String" itemvalue="Werkzeug" />
            <item index="38" class="java.lang.String" itemvalue="blessings" />
            <item index="39" class="java.lang.String" itemvalue="wget" />
            <item index="40" class="java.lang.String" itemvalue="dominate" />
            <item index="41" class="java.lang.String" itemvalue="psutil" />
            <item index="42" class="java.lang.String" itemvalue="torchprofile" />
            <item index="43" class="java.lang.String" itemvalue="tensorboard" />
            <item index="44" class="java.lang.String" itemvalue="grpcio" />
            <item index="45" class="java.lang.String" itemvalue="olefile" />
            <item index="46" class="java.lang.String" itemvalue="Markdown" />
            <item index="47" class="java.lang.String" itemvalue="pycocotools" />
            <item index="48" class="java.lang.String" itemvalue="nvidia-ml-py3" />
            <item index="49" class="java.lang.String" itemvalue="jedi" />
            <item index="50" class="java.lang.String" itemvalue="MNNCV" />
            <item index="51" class="java.lang.String" itemvalue="boto3" />
            <item index="52" class="java.lang.String" itemvalue="watchdog" />
            <item index="53" class="java.lang.String" itemvalue="botocore" />
            <item index="54" class="java.lang.String" itemvalue="validators" />
            <item index="55" class="java.lang.String" itemvalue="streamlit" />
            <item index="56" class="java.lang.String" itemvalue="toml" />
            <item index="57" class="java.lang.String" itemvalue="MNN" />
            <item index="58" class="java.lang.String" itemvalue="pyrsistent" />
            <item index="59" class="java.lang.String" itemvalue="pytorch-fid" />
            <item index="60" class="java.lang.String" itemvalue="visdom" />
            <item index="61" class="java.lang.String" itemvalue="lpips" />
            <item index="62" class="java.lang.String" itemvalue="joblib" />
            <item index="63" class="java.lang.String" itemvalue="oss2" />
            <item index="64" class="java.lang.String" itemvalue="imgaug" />
            <item index="65" class="java.lang.String" itemvalue="opencv_python" />
            <item index="66" class="java.lang.String" itemvalue="absl-py" />
            <item index="67" class="java.lang.String" itemvalue="wandb" />
            <item index="68" class="java.lang.String" itemvalue="opencv-python-headless" />
            <item index="69" class="java.lang.String" itemvalue="ninja" />
            <item index="70" class="java.lang.String" itemvalue="chardet" />
            <item index="71" class="java.lang.String" itemvalue="cycler" />
            <item index="72" class="java.lang.String" itemvalue="kornia" />
            <item index="73" class="java.lang.String" itemvalue="pytorch-lightning" />
            <item index="74" class="java.lang.String" itemvalue="trimesh" />
            <item index="75" class="java.lang.String" itemvalue="omegaconf" />
            <item index="76" class="java.lang.String" itemvalue="opencv-contrib-python" />
            <item index="77" class="java.lang.String" itemvalue="pyglet" />
            <item index="78" class="java.lang.String" itemvalue="PyMCubes" />
            <item index="79" class="java.lang.String" itemvalue="chumpy" />
            <item index="80" class="java.lang.String" itemvalue="plyfile" />
            <item index="81" class="java.lang.String" itemvalue="yacs" />
            <item index="82" class="java.lang.String" itemvalue="torchmetrics" />
            <item index="83" class="java.lang.String" itemvalue="dataclasses" />
            <item index="84" class="java.lang.String" itemvalue="test-tube" />
            <item index="85" class="java.lang.String" itemvalue="rtree" />
            <item index="86" class="java.lang.String" itemvalue="lark-parser" />
            <item index="87" class="java.lang.String" itemvalue="PyEXR" />
            <item index="88" class="java.lang.String" itemvalue="commentjson" />
            <item index="89" class="java.lang.String" itemvalue="pybind11" />
            <item index="90" class="java.lang.String" itemvalue="open3d" />
            <item index="91" class="java.lang.String" itemvalue="opendr" />
            <item index="92" class="java.lang.String" itemvalue="h5py" />
            <item index="93" class="java.lang.String" itemvalue="tokenizers" />
            <item index="94" class="java.lang.String" itemvalue="transformers" />
            <item index="95" class="java.lang.String" itemvalue="yapf" />
            <item index="96" class="java.lang.String" itemvalue="addict" />
          </list>
        </value>
      </option>
    </inspection_tool>
  </profile>
 </component>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
 <component name="InspectionProjectProfileManager">
  <settings>
    <option name="USE_PROJECT_PROFILE" value="false" />
    <version value="1.0" />
  </settings>
 </component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="JavaScriptSettings">
    <option name="languageLevel" value="ES6" />
  </component>
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (mnn_python)" project-jdk-type="Python SDK" />
 </project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/DCT-Net.iml" filepath="$PROJECT_DIR$/.idea/DCT-Net.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
 </project>
--- a/.idea/webServers.xml
+++ b/.idea/webServers.xml
@ -0,0 +1,14 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="WebServers">
    <option name="servers">
      <webServer id="97e6de3f-73ca-4b14-a177-10b25899d222" name="11.160.138.24" url="http://11.160.138.24">
        <fileTransfer rootFolder="/data/qingyao/gitProjects/DCT-Net" accessType="SFTP" host="11.160.138.24" port="22" sshConfigId="80d63b94-77a9-490e-a638-71580f352c36" sshConfig="myf272609@11.160.138.24:22 password">
          <advancedOptions>
            <advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
          </advancedOptions>
        </fileTransfer>
      </webServer>
    </option>
  </component>
 </project>
--- a/README.md
+++ b/README.md
@ -19,13 +19,54 @@ Official implementation of DCT-Net for Portrait Stylization.
 ## News
 (2022-07-07) The paper is available now at arxiv(https://arxiv.org/abs/2207.02426).
 (2022-08-08) cartoon function can be directly call from pythonSDK of [modelscope](https://modelscope.cn/#/models).
 (2022-08-08) The pertained model and infer code of 'anime' style is available now. More styles coming soon.
 ## Requirements
 * python 3
 * tensorflow (>=1.14)
 * easydict
 * numpy
 * both CPU/GPU are supported
 ## Quick Start
 ### From python SDK
 A quick use with python SDK
 - Installation:
 ```bash
 conda create -n dctnet python=3.8
 conda activate dctnet
 pip install tensorflow
 pip install "modelscope[cv]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
 ```
 - Downloads:
 ```bash
 python download.py
 ```
 - Inference:
 ```bash
 python run_sdk.py
 ```
 ### From source code
 ```bash
 python run.py
 ```
 ## Acknowledgments
 Face detector and aligner are adapted from [Peppa_Pig_Face_Engine](https://github.com/610265158/Peppa_Pig_Face_Engine
 ) and [InsightFace](https://github.com/TreB1eN/InsightFace_Pytorch).
 ## Citation
@ -40,6 +81,13 @@ If you find this code useful for your research, please use the following BibTeX
  number={4},
  pages={1--9},
  year={2022},
-  publisher={ACM Vancouver, BC, Canada}
+  publisher={ACM New York, NY, USA}
 }
 ```
--- a/assets/demo.gif
+++ b/assets/demo.gif
--- a/download.py
+++ b/download.py
@ -0,0 +1,4 @@
 from modelscope.hub.snapshot_download import snapshot_download
 model_dir = snapshot_download('damo/cv_unet_person-image-cartoon_compound-models', cache_dir='.')
--- a/run.py
+++ b/run.py
@ -0,0 +1,23 @@
 import cv2
 from source.cartoonize import Cartoonizer
 import os
 def process():
    algo = Cartoonizer(dataroot='damo/cv_unet_person-image-cartoon_compound-models')
    img = cv2.imread('input.png')[...,::-1]
    result = algo.cartoonize(img)
    cv2.imwrite('res.png', result)
    print('finished!')
 if __name__ == '__main__':
    process()
--- a/run_sdk.py
+++ b/run_sdk.py
@ -0,0 +1,12 @@
 import cv2
 from modelscope.pipelines import pipeline
 from modelscope.utils.constant import Tasks
 img_cartoon = pipeline(Tasks.image_portrait_stylization, 'damo/cv_unet_person-image-cartoon_compound-models')
 img_cartoon = pipeline('image-portrait-stylization')
 result = img_cartoon('input.png')
 cv2.imwrite('result.png', result['output_img'])
--- a/source/.DS_Store
+++ b/source/.DS_Store
--- a/source/init.py
+++ b/source/init.py
--- a/source/cartoonize.py
+++ b/source/cartoonize.py
@ -0,0 +1,120 @@
 import os
 import cv2
 import tensorflow as tf
 import numpy as np
 from source.facelib.facer import FaceAna
 import source.utils as utils
 from source.mtcnn_pytorch.src.align_trans import warp_and_crop_face, get_reference_facial_points
 if tf.__version__ >= '2.0':
    tf = tf.compat.v1
    tf.disable_eager_execution()
 class Cartoonizer():
    def __init__(self, dataroot):
        self.facer = FaceAna(dataroot)
        self.sess_head = self.load_sess(
            os.path.join(dataroot, 'cartoon_anime_h.pb'), 'model_head')
        self.sess_bg = self.load_sess(
            os.path.join(dataroot, 'cartoon_anime_bg.pb'), 'model_bg')
        self.box_width = 288
        global_mask = cv2.imread(os.path.join(dataroot, 'alpha.jpg'))
        global_mask = cv2.resize(
            global_mask, (self.box_width, self.box_width),
            interpolation=cv2.INTER_AREA)
        self.global_mask = cv2.cvtColor(
            global_mask, cv2.COLOR_BGR2GRAY).astype(np.float32) / 255.0
    def load_sess(self, model_path, name):
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        print(f'loading model from {model_path}')
        with tf.gfile.FastGFile(model_path, 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            sess.graph.as_default()
            tf.import_graph_def(graph_def, name=name)
            sess.run(tf.global_variables_initializer())
        print(f'load model {model_path} done.')
        return sess
    def detect_face(self, img):
        src_h, src_w, _ = img.shape
        src_x = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        boxes, landmarks, _ = self.facer.run(src_x)
        if boxes.shape[0] == 0:
            return None
        else:
            return landmarks
    def cartoonize(self, img):
        # img: RGB input
        ori_h, ori_w, _ = img.shape
        img = utils.resize_size(img, size=720)
        img_brg = img[:, :, ::-1]
        # background process
        pad_bg, pad_h, pad_w = utils.padTo16x(img_brg)
        bg_res = self.sess_bg.run(
            self.sess_bg.graph.get_tensor_by_name(
                'model_bg/output_image:0'),
            feed_dict={'model_bg/input_image:0': pad_bg})
        res = bg_res[:pad_h, :pad_w, :]
        landmarks = self.detect_face(img_brg)
        if landmarks is None:
            print('No face detected!')
            return res
        print('%d faces detected!'%len(landmarks))
        for landmark in landmarks:
            # get facial 5 points
            f5p = utils.get_f5p(landmark, img_brg)
            # face alignment
            head_img, trans_inv = warp_and_crop_face(
                img,
                f5p,
                ratio=0.75,
                reference_pts=get_reference_facial_points(default_square=True),
                crop_size=(self.box_width, self.box_width),
                return_trans_inv=True)
            # head process
            head_res = self.sess_head.run(
                self.sess_head.graph.get_tensor_by_name(
                    'model_head/output_image:0'),
                feed_dict={
                    'model_head/input_image:0': head_img[:, :, ::-1]
                })
            # merge head and background
            head_trans_inv = cv2.warpAffine(
                head_res,
                trans_inv, (np.size(img, 1), np.size(img, 0)),
                borderValue=(0, 0, 0))
            mask = self.global_mask
            mask_trans_inv = cv2.warpAffine(
                mask,
                trans_inv, (np.size(img, 1), np.size(img, 0)),
                borderValue=(0, 0, 0))
            mask_trans_inv = np.expand_dims(mask_trans_inv, 2)
            res = mask_trans_inv * head_trans_inv + (1 - mask_trans_inv) * res
        res = cv2.resize(res, (ori_w, ori_h), interpolation=cv2.INTER_AREA)
        return res
--- a/source/facelib/LICENSE
+++ b/source/facelib/LICENSE
@ -0,0 +1,4 @@
 Copyright (c) Peppa_Pig_Face_Engine
 https://github.com/610265158/Peppa_Pig_Face_Engine
--- a/source/facelib/LK/init.py
+++ b/source/facelib/LK/init.py
--- a/source/facelib/LK/lk.py
+++ b/source/facelib/LK/lk.py
@ -0,0 +1,97 @@
 import numpy as np
 from modelscope.models.cv.cartoon.facelib.config import config as cfg
 class GroupTrack():
    def __init__(self):
        self.old_frame = None
        self.previous_landmarks_set = None
        self.with_landmark = True
        self.thres = cfg.TRACE.pixel_thres
        self.alpha = cfg.TRACE.smooth_landmark
        self.iou_thres = cfg.TRACE.iou_thres
    def calculate(self, img, current_landmarks_set):
        if self.previous_landmarks_set is None:
            self.previous_landmarks_set = current_landmarks_set
            result = current_landmarks_set
        else:
            previous_lm_num = self.previous_landmarks_set.shape[0]
            if previous_lm_num == 0:
                self.previous_landmarks_set = current_landmarks_set
                result = current_landmarks_set
                return result
            else:
                result = []
                for i in range(current_landmarks_set.shape[0]):
                    not_in_flag = True
                    for j in range(previous_lm_num):
                        if self.iou(current_landmarks_set[i],
                                    self.previous_landmarks_set[j]
                                    ) > self.iou_thres:
                            result.append(
                                self.smooth(current_landmarks_set[i],
                                            self.previous_landmarks_set[j]))
                            not_in_flag = False
                            break
                    if not_in_flag:
                        result.append(current_landmarks_set[i])
        result = np.array(result)
        self.previous_landmarks_set = result
        return result
    def iou(self, p_set0, p_set1):
        rec1 = [
            np.min(p_set0[:, 0]),
            np.min(p_set0[:, 1]),
            np.max(p_set0[:, 0]),
            np.max(p_set0[:, 1])
        ]
        rec2 = [
            np.min(p_set1[:, 0]),
            np.min(p_set1[:, 1]),
            np.max(p_set1[:, 0]),
            np.max(p_set1[:, 1])
        ]
        # computing area of each rectangles
        S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
        S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
        # computing the sum_area
        sum_area = S_rec1 + S_rec2
        # find the each edge of intersect rectangle
        x1 = max(rec1[0], rec2[0])
        y1 = max(rec1[1], rec2[1])
        x2 = min(rec1[2], rec2[2])
        y2 = min(rec1[3], rec2[3])
        # judge if there is an intersect
        intersect = max(0, x2 - x1) * max(0, y2 - y1)
        iou = intersect / (sum_area - intersect)
        return iou
    def smooth(self, now_landmarks, previous_landmarks):
        result = []
        for i in range(now_landmarks.shape[0]):
            x = now_landmarks[i][0] - previous_landmarks[i][0]
            y = now_landmarks[i][1] - previous_landmarks[i][1]
            dis = np.sqrt(np.square(x) + np.square(y))
            if dis < self.thres:
                result.append(previous_landmarks[i])
            else:
                result.append(
                    self.do_moving_average(now_landmarks[i],
                                           previous_landmarks[i]))
        return np.array(result)
    def do_moving_average(self, p_now, p_previous):
        p = self.alpha * p_now + (1 - self.alpha) * p_previous
        return p
--- a/source/facelib/init.py
+++ b/source/facelib/init.py
--- a/source/facelib/config.py
+++ b/source/facelib/config.py
@ -0,0 +1,23 @@
 import os
 import numpy as np
 from easydict import EasyDict as edict
 config = edict()
 os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 config.DETECT = edict()
 config.DETECT.topk = 10
 config.DETECT.thres = 0.8
 config.DETECT.input_shape = (512, 512, 3)
 config.KEYPOINTS = edict()
 config.KEYPOINTS.p_num = 68
 config.KEYPOINTS.base_extend_range = [0.2, 0.3]
 config.KEYPOINTS.input_shape = (160, 160, 3)
 config.TRACE = edict()
 config.TRACE.pixel_thres = 1
 config.TRACE.smooth_box = 0.3
 config.TRACE.smooth_landmark = 0.95
 config.TRACE.iou_thres = 0.5
 config.DATA = edict()
 config.DATA.pixel_means = np.array([123., 116., 103.])  # RGB
--- a/source/facelib/face_detector.py
+++ b/source/facelib/face_detector.py
@ -0,0 +1,116 @@
 import time
 import cv2
 import numpy as np
 import tensorflow as tf
 from .config import config as cfg
 if tf.__version__ >= '2.0':
    tf = tf.compat.v1
 class FaceDetector:
    def __init__(self, dir):
        self.model_path = dir + '/detector.pb'
        self.thres = cfg.DETECT.thres
        self.input_shape = cfg.DETECT.input_shape
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._graph, self._sess = self.init_model(self.model_path)
            self.input_image = tf.get_default_graph().get_tensor_by_name(
                'tower_0/images:0')
            self.training = tf.get_default_graph().get_tensor_by_name(
                'training_flag:0')
            self.output_ops = [
                tf.get_default_graph().get_tensor_by_name('tower_0/boxes:0'),
                tf.get_default_graph().get_tensor_by_name('tower_0/scores:0'),
                tf.get_default_graph().get_tensor_by_name(
                    'tower_0/num_detections:0'),
            ]
    def __call__(self, image):
        image, scale_x, scale_y = self.preprocess(
            image,
            target_width=self.input_shape[1],
            target_height=self.input_shape[0])
        image = np.expand_dims(image, 0)
        boxes, scores, num_boxes = self._sess.run(
            self.output_ops,
            feed_dict={
                self.input_image: image,
                self.training: False
            })
        num_boxes = num_boxes[0]
        boxes = boxes[0][:num_boxes]
        scores = scores[0][:num_boxes]
        to_keep = scores > self.thres
        boxes = boxes[to_keep]
        scores = scores[to_keep]
        y1 = self.input_shape[0] / scale_y
        x1 = self.input_shape[1] / scale_x
        y2 = self.input_shape[0] / scale_y
        x2 = self.input_shape[1] / scale_x
        scaler = np.array([y1, x1, y2, x2], dtype='float32')
        boxes = boxes * scaler
        scores = np.expand_dims(scores, 0).reshape([-1, 1])
        for i in range(boxes.shape[0]):
            boxes[i] = np.array(
                [boxes[i][1], boxes[i][0], boxes[i][3], boxes[i][2]])
        return np.concatenate([boxes, scores], axis=1)
    def preprocess(self, image, target_height, target_width, label=None):
        h, w, c = image.shape
        bimage = np.zeros(
            shape=[target_height, target_width, c],
            dtype=image.dtype) + np.array(
                cfg.DATA.pixel_means, dtype=image.dtype)
        long_side = max(h, w)
        scale_x = scale_y = target_height / long_side
        image = cv2.resize(image, None, fx=scale_x, fy=scale_y)
        h_, w_, _ = image.shape
        bimage[:h_, :w_, :] = image
        return bimage, scale_x, scale_y
    def init_model(self, *args):
        pb_path = args[0]
        def init_pb(model_path):
            config = tf.ConfigProto()
            config.gpu_options.per_process_gpu_memory_fraction = 0.2
            compute_graph = tf.Graph()
            compute_graph.as_default()
            sess = tf.Session(config=config)
            with tf.gfile.GFile(model_path, 'rb') as fid:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(fid.read())
                tf.import_graph_def(graph_def, name='')
            return (compute_graph, sess)
        model = init_pb(pb_path)
        graph = model[0]
        sess = model[1]
        return graph, sess
--- a/source/facelib/face_landmark.py
+++ b/source/facelib/face_landmark.py
@ -0,0 +1,154 @@
 import cv2
 import numpy as np
 import tensorflow as tf
 from .config import config as cfg
 if tf.__version__ >= '2.0':
    tf = tf.compat.v1
 class FaceLandmark:
    def __init__(self, dir):
        self.model_path = dir + '/keypoints.pb'
        self.min_face = 60
        self.keypoint_num = cfg.KEYPOINTS.p_num * 2
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._graph, self._sess = self.init_model(self.model_path)
            self.img_input = tf.get_default_graph().get_tensor_by_name(
                'tower_0/images:0')
            self.embeddings = tf.get_default_graph().get_tensor_by_name(
                'tower_0/prediction:0')
            self.training = tf.get_default_graph().get_tensor_by_name(
                'training_flag:0')
            self.landmark = self.embeddings[:, :self.keypoint_num]
            self.headpose = self.embeddings[:, -7:-4] * 90.
            self.state = tf.nn.sigmoid(self.embeddings[:, -4:])
    def __call__(self, img, bboxes):
        landmark_result = []
        state_result = []
        for i, bbox in enumerate(bboxes):
            landmark, state = self._one_shot_run(img, bbox, i)
            if landmark is not None:
                landmark_result.append(landmark)
                state_result.append(state)
        return np.array(landmark_result), np.array(state_result)
    def simple_run(self, cropped_img):
        with self._graph.as_default():
            cropped_img = np.expand_dims(cropped_img, axis=0)
            landmark, p, states = self._sess.run(
                [self.landmark, self.headpose, self.state],
                feed_dict={
                    self.img_input: cropped_img,
                    self.training: False
                })
        return landmark, states
    def _one_shot_run(self, image, bbox, i):
        bbox_width = bbox[2] - bbox[0]
        bbox_height = bbox[3] - bbox[1]
        if (bbox_width <= self.min_face and bbox_height <= self.min_face):
            return None, None
        add = int(max(bbox_width, bbox_height))
        bimg = cv2.copyMakeBorder(
            image,
            add,
            add,
            add,
            add,
            borderType=cv2.BORDER_CONSTANT,
            value=cfg.DATA.pixel_means)
        bbox += add
        one_edge = (1 + 2 * cfg.KEYPOINTS.base_extend_range[0]) * bbox_width
        center = [(bbox[0] + bbox[2]) // 2, (bbox[1] + bbox[3]) // 2]
        bbox[0] = center[0] - one_edge // 2
        bbox[1] = center[1] - one_edge // 2
        bbox[2] = center[0] + one_edge // 2
        bbox[3] = center[1] + one_edge // 2
        bbox = bbox.astype(np.int)
        crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
        h, w, _ = crop_image.shape
        crop_image = cv2.resize(
            crop_image,
            (cfg.KEYPOINTS.input_shape[1], cfg.KEYPOINTS.input_shape[0]))
        crop_image = crop_image.astype(np.float32)
        keypoints, state = self.simple_run(crop_image)
        res = keypoints[0][:self.keypoint_num].reshape((-1, 2))
        res[:, 0] = res[:, 0] * w / cfg.KEYPOINTS.input_shape[1]
        res[:, 1] = res[:, 1] * h / cfg.KEYPOINTS.input_shape[0]
        landmark = []
        for _index in range(res.shape[0]):
            x_y = res[_index]
            landmark.append([
                int(x_y[0] * cfg.KEYPOINTS.input_shape[0] + bbox[0] - add),
                int(x_y[1] * cfg.KEYPOINTS.input_shape[1] + bbox[1] - add)
            ])
        landmark = np.array(landmark, np.float32)
        return landmark, state
    def init_model(self, *args):
        if len(args) == 1:
            use_pb = True
            pb_path = args[0]
        else:
            use_pb = False
            meta_path = args[0]
            restore_model_path = args[1]
        def ini_ckpt():
            graph = tf.Graph()
            graph.as_default()
            configProto = tf.ConfigProto()
            configProto.gpu_options.allow_growth = True
            sess = tf.Session(config=configProto)
            # load_model(model_path, sess)
            saver = tf.train.import_meta_graph(meta_path)
            saver.restore(sess, restore_model_path)
            print('Model restred!')
            return (graph, sess)
        def init_pb(model_path):
            config = tf.ConfigProto()
            config.gpu_options.per_process_gpu_memory_fraction = 0.2
            compute_graph = tf.Graph()
            compute_graph.as_default()
            sess = tf.Session(config=config)
            with tf.gfile.GFile(model_path, 'rb') as fid:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(fid.read())
                tf.import_graph_def(graph_def, name='')
            # saver = tf.train.Saver(tf.global_variables())
            # saver.save(sess, save_path='./tmp.ckpt')
            return (compute_graph, sess)
        if use_pb:
            model = init_pb(pb_path)
        else:
            model = ini_ckpt()
        graph = model[0]
        sess = model[1]
        return graph, sess
--- a/source/facelib/facer.py
+++ b/source/facelib/facer.py
@ -0,0 +1,150 @@
 import time
 import cv2
 import numpy as np
 from .config import config as cfg
 from .face_detector import FaceDetector
 from .face_landmark import FaceLandmark
 from .LK.lk import GroupTrack
 class FaceAna():
    '''
    by default the top3 facea sorted by area will be calculated for time reason
    '''
    def __init__(self, model_dir):
        self.face_detector = FaceDetector(model_dir)
        self.face_landmark = FaceLandmark(model_dir)
        self.trace = GroupTrack()
        self.track_box = None
        self.previous_image = None
        self.previous_box = None
        self.diff_thres = 5
        self.top_k = cfg.DETECT.topk
        self.iou_thres = cfg.TRACE.iou_thres
        self.alpha = cfg.TRACE.smooth_box
    def run(self, image):
        boxes = self.face_detector(image)
        if boxes.shape[0] > self.top_k:
            boxes = self.sort(boxes)
        boxes_return = np.array(boxes)
        landmarks, states = self.face_landmark(image, boxes)
        if 1:
            track = []
            for i in range(landmarks.shape[0]):
                track.append([
                    np.min(landmarks[i][:, 0]),
                    np.min(landmarks[i][:, 1]),
                    np.max(landmarks[i][:, 0]),
                    np.max(landmarks[i][:, 1])
                ])
            tmp_box = np.array(track)
            self.track_box = self.judge_boxs(boxes_return, tmp_box)
        self.track_box, landmarks = self.sort_res(self.track_box, landmarks)
        return self.track_box, landmarks, states
    def sort_res(self, bboxes, points):
        area = []
        for bbox in bboxes:
            bbox_width = bbox[2] - bbox[0]
            bbox_height = bbox[3] - bbox[1]
            area.append(bbox_height * bbox_width)
        area = np.array(area)
        picked = area.argsort()[::-1]
        sorted_bboxes = [bboxes[x] for x in picked]
        sorted_points = [points[x] for x in picked]
        return np.array(sorted_bboxes), np.array(sorted_points)
    def diff_frames(self, previous_frame, image):
        if previous_frame is None:
            return True
        else:
            _diff = cv2.absdiff(previous_frame, image)
            diff = np.sum(
                _diff) / previous_frame.shape[0] / previous_frame.shape[1] / 3.
            return diff > self.diff_thres
    def sort(self, bboxes):
        if self.top_k > 100:
            return bboxes
        area = []
        for bbox in bboxes:
            bbox_width = bbox[2] - bbox[0]
            bbox_height = bbox[3] - bbox[1]
            area.append(bbox_height * bbox_width)
        area = np.array(area)
        picked = area.argsort()[-self.top_k:][::-1]
        sorted_bboxes = [bboxes[x] for x in picked]
        return np.array(sorted_bboxes)
    def judge_boxs(self, previuous_bboxs, now_bboxs):
        def iou(rec1, rec2):
            # computing area of each rectangles
            S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
            S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
            # computing the sum_area
            sum_area = S_rec1 + S_rec2
            # find the each edge of intersect rectangle
            x1 = max(rec1[0], rec2[0])
            y1 = max(rec1[1], rec2[1])
            x2 = min(rec1[2], rec2[2])
            y2 = min(rec1[3], rec2[3])
            # judge if there is an intersect
            intersect = max(0, x2 - x1) * max(0, y2 - y1)
            return intersect / (sum_area - intersect)
        if previuous_bboxs is None:
            return now_bboxs
        result = []
        for i in range(now_bboxs.shape[0]):
            contain = False
            for j in range(previuous_bboxs.shape[0]):
                if iou(now_bboxs[i], previuous_bboxs[j]) > self.iou_thres:
                    result.append(
                        self.smooth(now_bboxs[i], previuous_bboxs[j]))
                    contain = True
                    break
            if not contain:
                result.append(now_bboxs[i])
        return np.array(result)
    def smooth(self, now_box, previous_box):
        return self.do_moving_average(now_box[:4], previous_box[:4])
    def do_moving_average(self, p_now, p_previous):
        p = self.alpha * p_now + (1 - self.alpha) * p_previous
        return p
    def reset(self):
        '''
        reset the previous info used foe tracking,
        :return:
        '''
        self.track_box = None
        self.previous_image = None
        self.previous_box = None
--- a/source/mtcnn_pytorch/LICENSE
+++ b/source/mtcnn_pytorch/LICENSE
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2017 Dan Antoshchenko
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/source/mtcnn_pytorch/README.md
+++ b/source/mtcnn_pytorch/README.md
@ -0,0 +1,26 @@
 # MTCNN
 `pytorch` implementation of **inference stage** of face detection algorithm described in
 [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878).
 ## Example
 ![example of a face detection](images/example.png)
 ## How to use it
 Just download the repository and then do this
 ```python
 from src import detect_faces
 from PIL import Image
 image = Image.open('image.jpg')
 bounding_boxes, landmarks = detect_faces(image)
 ```
 For examples see `test_on_images.ipynb`.
 ## Requirements
 * pytorch 0.2
 * Pillow, numpy
 ## Credit
 This implementation is heavily inspired by:
 * [pangyupo/mxnet_mtcnn_face_detection](https://github.com/pangyupo/mxnet_mtcnn_face_detection)
--- a/source/mtcnn_pytorch/init.py
+++ b/source/mtcnn_pytorch/init.py
--- a/source/mtcnn_pytorch/src/init.py
+++ b/source/mtcnn_pytorch/src/init.py
--- a/source/mtcnn_pytorch/src/align_trans.py
+++ b/source/mtcnn_pytorch/src/align_trans.py
@ -0,0 +1,187 @@
 """
 Created on Mon Apr 24 15:43:29 2017
@author: zhaoy
 """
 import cv2
 import numpy as np
 from .matlab_cp2tform import get_similarity_transform_for_cv2
 # reference facial points, a list of coordinates (x,y)
 dx = 1
 dy = 1
 REFERENCE_FACIAL_POINTS = [
    [30.29459953 + dx, 51.69630051 + dy],  # left eye
    [65.53179932 + dx, 51.50139999 + dy],  # right eye
    [48.02519989 + dx, 71.73660278 + dy],  # nose
    [33.54930115 + dx, 92.3655014 + dy],  # left mouth
    [62.72990036 + dx, 92.20410156 + dy]  # right mouth
 ]
 DEFAULT_CROP_SIZE = (96, 112)
 global FACIAL_POINTS
 class FaceWarpException(Exception):
    def __str__(self):
        return 'In File {}:{}'.format(__file__, super.__str__(self))
 def get_reference_facial_points(output_size=None,
                                inner_padding_factor=0.0,
                                outer_padding=(0, 0),
                                default_square=False):
    tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
    tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
    # 0) make the inner region a square
    if default_square:
        size_diff = max(tmp_crop_size) - tmp_crop_size
        tmp_5pts += size_diff / 2
        tmp_crop_size += size_diff
    h_crop = tmp_crop_size[0]
    w_crop = tmp_crop_size[1]
    if (output_size):
        if (output_size[0] == h_crop and output_size[1] == w_crop):
            return tmp_5pts
    if (inner_padding_factor == 0 and outer_padding == (0, 0)):
        if output_size is None:
            return tmp_5pts
        else:
            raise FaceWarpException(
                'No paddings to do, output_size must be None or {}'.format(
                    tmp_crop_size))
    # check output size
    if not (0 <= inner_padding_factor <= 1.0):
        raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
    factor = inner_padding_factor > 0 or outer_padding[0] > 0
    factor = factor or outer_padding[1] > 0
    if (factor and output_size is None):
        output_size = tmp_crop_size * \
            (1 + inner_padding_factor * 2).astype(np.int32)
        output_size += np.array(outer_padding)
    cond1 = outer_padding[0] < output_size[0]
    cond2 = outer_padding[1] < output_size[1]
    if not (cond1 and cond2):
        raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
                                'and outer_padding[1] < output_size[1])')
    # 1) pad the inner region according inner_padding_factor
    if inner_padding_factor > 0:
        size_diff = tmp_crop_size * inner_padding_factor * 2
        tmp_5pts += size_diff / 2
        tmp_crop_size += np.round(size_diff).astype(np.int32)
    # 2) resize the padded inner region
    size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
    if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[
            1] * tmp_crop_size[0]:
        raise FaceWarpException(
            'Must have (output_size - outer_padding)'
            '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
    scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
    tmp_5pts = tmp_5pts * scale_factor
    # 3) add outer_padding to make output_size
    reference_5point = tmp_5pts + np.array(outer_padding)
    return reference_5point
 def get_affine_transform_matrix(src_pts, dst_pts):
    tfm = np.float32([[1, 0, 0], [0, 1, 0]])
    n_pts = src_pts.shape[0]
    ones = np.ones((n_pts, 1), src_pts.dtype)
    src_pts_ = np.hstack([src_pts, ones])
    dst_pts_ = np.hstack([dst_pts, ones])
    A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
    if rank == 3:
        tfm = np.float32([[A[0, 0], A[1, 0], A[2, 0]],
                          [A[0, 1], A[1, 1], A[2, 1]]])
    elif rank == 2:
        tfm = np.float32([[A[0, 0], A[1, 0], 0], [A[0, 1], A[1, 1], 0]])
    return tfm
 def warp_and_crop_face(src_img,
                       facial_pts,
                       ratio=0.84,
                       reference_pts=None,
                       crop_size=(96, 112),
                       align_type='similarity'
                       '',
                       return_trans_inv=False):
    if reference_pts is None:
        if crop_size[0] == 96 and crop_size[1] == 112:
            reference_pts = REFERENCE_FACIAL_POINTS
        else:
            default_square = False
            inner_padding_factor = 0
            outer_padding = (0, 0)
            output_size = crop_size
            reference_pts = get_reference_facial_points(
                output_size, inner_padding_factor, outer_padding,
                default_square)
    ref_pts = np.float32(reference_pts)
    factor = ratio
    ref_pts = (ref_pts - 112 / 2) * factor + 112 / 2
    ref_pts *= crop_size[0] / 112.
    ref_pts_shp = ref_pts.shape
    if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
        raise FaceWarpException(
            'reference_pts.shape must be (K,2) or (2,K) and K>2')
    if ref_pts_shp[0] == 2:
        ref_pts = ref_pts.T
    src_pts = np.float32(facial_pts)
    src_pts_shp = src_pts.shape
    if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
        raise FaceWarpException(
            'facial_pts.shape must be (K,2) or (2,K) and K>2')
    if src_pts_shp[0] == 2:
        src_pts = src_pts.T
    if src_pts.shape != ref_pts.shape:
        raise FaceWarpException(
            'facial_pts and reference_pts must have the same shape')
    if align_type == 'cv2_affine':
        tfm = cv2.getAffineTransform(src_pts, ref_pts)
        tfm_inv = cv2.getAffineTransform(ref_pts, src_pts)
    elif align_type == 'affine':
        tfm = get_affine_transform_matrix(src_pts, ref_pts)
        tfm_inv = get_affine_transform_matrix(ref_pts, src_pts)
    else:
        tfm, tfm_inv = get_similarity_transform_for_cv2(src_pts, ref_pts)
    face_img = cv2.warpAffine(
        src_img,
        tfm, (crop_size[0], crop_size[1]),
        borderValue=(255, 255, 255))
    if return_trans_inv:
        return face_img, tfm_inv
    else:
        return face_img
--- a/source/mtcnn_pytorch/src/matlab_cp2tform.py
+++ b/source/mtcnn_pytorch/src/matlab_cp2tform.py
@ -0,0 +1,339 @@
 """
 Created on Tue Jul 11 06:54:28 2017
@author: zhaoyafei
 """
 import numpy as np
 from numpy.linalg import inv, lstsq
 from numpy.linalg import matrix_rank as rank
 from numpy.linalg import norm
 class MatlabCp2tormException(Exception):
    def __str__(self):
        return 'In File {}:{}'.format(__file__, super.__str__(self))
 def tformfwd(trans, uv):
    """
    Function:
    ----------
        apply affine transform 'trans' to uv
    Parameters:
    ----------
        @trans: 3x3 np.array
            transform matrix
        @uv: Kx2 np.array
            each row is a pair of coordinates (x, y)
    Returns:
    ----------
        @xy: Kx2 np.array
            each row is a pair of transformed coordinates (x, y)
    """
    uv = np.hstack((uv, np.ones((uv.shape[0], 1))))
    xy = np.dot(uv, trans)
    xy = xy[:, 0:-1]
    return xy
 def tforminv(trans, uv):
    """
    Function:
    ----------
        apply the inverse of affine transform 'trans' to uv
    Parameters:
    ----------
        @trans: 3x3 np.array
            transform matrix
        @uv: Kx2 np.array
            each row is a pair of coordinates (x, y)
    Returns:
    ----------
        @xy: Kx2 np.array
            each row is a pair of inverse-transformed coordinates (x, y)
    """
    Tinv = inv(trans)
    xy = tformfwd(Tinv, uv)
    return xy
 def findNonreflectiveSimilarity(uv, xy, options=None):
    options = {'K': 2}
    K = options['K']
    M = xy.shape[0]
    x = xy[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
    y = xy[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
    # print('--->x, y:\n', x, y
    tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
    tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
    X = np.vstack((tmp1, tmp2))
    # print('--->X.shape: ', X.shape
    # print('X:\n', X
    u = uv[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
    v = uv[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
    U = np.vstack((u, v))
    # print('--->U.shape: ', U.shape
    # print('U:\n', U
    # We know that X * r = U
    if rank(X) >= 2 * K:
        r, _, _, _ = lstsq(X, U)
        r = np.squeeze(r)
    else:
        raise Exception('cp2tform:twoUniquePointsReq')
    # print('--->r:\n', r
    sc = r[0]
    ss = r[1]
    tx = r[2]
    ty = r[3]
    Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]])
    # print('--->Tinv:\n', Tinv
    T = inv(Tinv)
    # print('--->T:\n', T
    T[:, 2] = np.array([0, 0, 1])
    return T, Tinv
 def findSimilarity(uv, xy, options=None):
    options = {'K': 2}
    #    uv = np.array(uv)
    #    xy = np.array(xy)
    # Solve for trans1
    trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
    # Solve for trans2
    # manually reflect the xy data across the Y-axis
    xyR = xy
    xyR[:, 0] = -1 * xyR[:, 0]
    trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
    # manually reflect the tform to undo the reflection done on xyR
    TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
    trans2 = np.dot(trans2r, TreflectY)
    # Figure out if trans1 or trans2 is better
    xy1 = tformfwd(trans1, uv)
    norm1 = norm(xy1 - xy)
    xy2 = tformfwd(trans2, uv)
    norm2 = norm(xy2 - xy)
    if norm1 <= norm2:
        return trans1, trans1_inv
    else:
        trans2_inv = inv(trans2)
        return trans2, trans2_inv
 def get_similarity_transform(src_pts, dst_pts, reflective=True):
    """
    Function:
    ----------
        Find Similarity Transform Matrix 'trans':
            u = src_pts[:, 0]
            v = src_pts[:, 1]
            x = dst_pts[:, 0]
            y = dst_pts[:, 1]
            [x, y, 1] = [u, v, 1] * trans
    Parameters:
    ----------
        @src_pts: Kx2 np.array
            source points, each row is a pair of coordinates (x, y)
        @dst_pts: Kx2 np.array
            destination points, each row is a pair of transformed
            coordinates (x, y)
        @reflective: True or False
            if True:
                use reflective similarity transform
            else:
                use non-reflective similarity transform
    Returns:
    ----------
       @trans: 3x3 np.array
            transform matrix from uv to xy
        trans_inv: 3x3 np.array
            inverse of trans, transform matrix from xy to uv
    """
    if reflective:
        trans, trans_inv = findSimilarity(src_pts, dst_pts)
    else:
        trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
    return trans, trans_inv
 def cvt_tform_mat_for_cv2(trans):
    """
    Function:
    ----------
        Convert Transform Matrix 'trans' into 'cv2_trans' which could be
        directly used by cv2.warpAffine():
            u = src_pts[:, 0]
            v = src_pts[:, 1]
            x = dst_pts[:, 0]
            y = dst_pts[:, 1]
            [x, y].T = cv_trans * [u, v, 1].T
    Parameters:
    ----------
        @trans: 3x3 np.array
            transform matrix from uv to xy
    Returns:
    ----------
        @cv2_trans: 2x3 np.array
            transform matrix from src_pts to dst_pts, could be directly used
            for cv2.warpAffine()
    """
    cv2_trans = trans[:, 0:2].T
    return cv2_trans
 def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
    """
    Function:
    ----------
        Find Similarity Transform Matrix 'cv2_trans' which could be
        directly used by cv2.warpAffine():
            u = src_pts[:, 0]
            v = src_pts[:, 1]
            x = dst_pts[:, 0]
            y = dst_pts[:, 1]
            [x, y].T = cv_trans * [u, v, 1].T
    Parameters:
    ----------
        @src_pts: Kx2 np.array
            source points, each row is a pair of coordinates (x, y)
        @dst_pts: Kx2 np.array
            destination points, each row is a pair of transformed
            coordinates (x, y)
        reflective: True or False
            if True:
                use reflective similarity transform
            else:
                use non-reflective similarity transform
    Returns:
    ----------
        @cv2_trans: 2x3 np.array
            transform matrix from src_pts to dst_pts, could be directly used
            for cv2.warpAffine()
    """
    trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
    cv2_trans = cvt_tform_mat_for_cv2(trans)
    cv2_trans_inv = cvt_tform_mat_for_cv2(trans_inv)
    return cv2_trans, cv2_trans_inv
 if __name__ == '__main__':
    """
    u = [0, 6, -2]
    v = [0, 3, 5]
    x = [-1, 0, 4]
    y = [-1, -10, 4]
    # In Matlab, run:
    #
    #   uv = [u'; v'];
    #   xy = [x'; y'];
    #   tform_sim=cp2tform(uv,xy,'similarity');
    #
    #   trans = tform_sim.tdata.T
    #   ans =
    #       -0.0764   -1.6190         0
    #        1.6190   -0.0764         0
    #       -3.2156    0.0290    1.0000
    #   trans_inv = tform_sim.tdata.Tinv
    #    ans =
    #
    #       -0.0291    0.6163         0
    #       -0.6163   -0.0291         0
    #       -0.0756    1.9826    1.0000
    #    xy_m=tformfwd(tform_sim, u,v)
    #
    #    xy_m =
    #
    #       -3.2156    0.0290
    #        1.1833   -9.9143
    #        5.0323    2.8853
    #    uv_m=tforminv(tform_sim, x,y)
    #
    #    uv_m =
    #
    #        0.5698    1.3953
    #        6.0872    2.2733
    #       -2.6570    4.3314
    """
    u = [0, 6, -2]
    v = [0, 3, 5]
    x = [-1, 0, 4]
    y = [-1, -10, 4]
    uv = np.array((u, v)).T
    xy = np.array((x, y)).T
    print('\n--->uv:')
    print(uv)
    print('\n--->xy:')
    print(xy)
    trans, trans_inv = get_similarity_transform(uv, xy)
    print('\n--->trans matrix:')
    print(trans)
    print('\n--->trans_inv matrix:')
    print(trans_inv)
    print('\n---> apply transform to uv')
    print('\nxy_m = uv_augmented * trans')
    uv_aug = np.hstack((uv, np.ones((uv.shape[0], 1))))
    xy_m = np.dot(uv_aug, trans)
    print(xy_m)
    print('\nxy_m = tformfwd(trans, uv)')
    xy_m = tformfwd(trans, uv)
    print(xy_m)
    print('\n---> apply inverse transform to xy')
    print('\nuv_m = xy_augmented * trans_inv')
    xy_aug = np.hstack((xy, np.ones((xy.shape[0], 1))))
    uv_m = np.dot(xy_aug, trans_inv)
    print(uv_m)
    print('\nuv_m = tformfwd(trans_inv, xy)')
    uv_m = tformfwd(trans_inv, xy)
    print(uv_m)
    uv_m = tforminv(trans, xy)
    print('\nuv_m = tforminv(trans, xy)')
    print(uv_m)
--- a/source/utils.py
+++ b/source/utils.py
@ -0,0 +1,107 @@
 import os
 import cv2
 import numpy as np
 def resize_size(image, size=720):
    h, w, c = np.shape(image)
    if min(h, w) > size:
        if h > w:
            h, w = int(size * h / w), size
        else:
            h, w = size, int(size * w / h)
    image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
    return image
 def padTo16x(image):
    h, w, c = np.shape(image)
    if h % 16 == 0 and w % 16 == 0:
        return image, h, w
    nh, nw = (h // 16 + 1) * 16, (w // 16 + 1) * 16
    img_new = np.ones((nh, nw, 3), np.uint8) * 255
    img_new[:h, :w, :] = image
    return img_new, h, w
 def get_f5p(landmarks, np_img):
    eye_left = find_pupil(landmarks[36:41], np_img)
    eye_right = find_pupil(landmarks[42:47], np_img)
    if eye_left is None or eye_right is None:
        print('cannot find 5 points with find_puil, used mean instead.!')
        eye_left = landmarks[36:41].mean(axis=0)
        eye_right = landmarks[42:47].mean(axis=0)
    nose = landmarks[30]
    mouth_left = landmarks[48]
    mouth_right = landmarks[54]
    f5p = [[eye_left[0], eye_left[1]], [eye_right[0], eye_right[1]],
           [nose[0], nose[1]], [mouth_left[0], mouth_left[1]],
           [mouth_right[0], mouth_right[1]]]
    return f5p
 def find_pupil(landmarks, np_img):
    h, w, _ = np_img.shape
    xmax = int(landmarks[:, 0].max())
    xmin = int(landmarks[:, 0].min())
    ymax = int(landmarks[:, 1].max())
    ymin = int(landmarks[:, 1].min())
    if ymin >= ymax or xmin >= xmax or ymin < 0 or xmin < 0 or ymax > h or xmax > w:
        return None
    eye_img_bgr = np_img[ymin:ymax, xmin:xmax, :]
    eye_img = cv2.cvtColor(eye_img_bgr, cv2.COLOR_BGR2GRAY)
    eye_img = cv2.equalizeHist(eye_img)
    n_marks = landmarks - np.array([xmin, ymin]).reshape([1, 2])
    eye_mask = cv2.fillConvexPoly(
        np.zeros_like(eye_img), n_marks.astype(np.int32), 1)
    ret, thresh = cv2.threshold(eye_img, 100, 255,
                                cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    thresh = (1 - thresh / 255.) * eye_mask
    cnt = 0
    xm = []
    ym = []
    for i in range(thresh.shape[0]):
        for j in range(thresh.shape[1]):
            if thresh[i, j] > 0.5:
                xm.append(j)
                ym.append(i)
                cnt += 1
    if cnt != 0:
        xm.sort()
        ym.sort()
        xm = xm[cnt // 2]
        ym = ym[cnt // 2]
    else:
        xm = thresh.shape[1] / 2
        ym = thresh.shape[0] / 2
    return xm + xmin, ym + ymin
 def all_file(file_dir):
    L = []
    for root, dirs, files in os.walk(file_dir):
        for file in files:
            extend = os.path.splitext(file)[1]
            if extend == '.png' or extend == '.jpg' or extend == '.jpeg':
                L.append(os.path.join(root, file))
    return L
 def initialize_mask(box_width):
    h, w = [box_width, box_width]
    mask = np.zeros((h, w), np.uint8)
    center = (int(w / 2), int(h / 2))
    axes = (int(w * 0.4), int(h * 0.49))
    mask = cv2.ellipse(img=mask, center=center, axes=axes, angle=0, startAngle=0, endAngle=360, color=(1),
                       thickness=-1)
    mask = cv2.distanceTransform(mask, cv2.DIST_L2, 3)
    maxn = max(w, h) * 0.15
    mask[(mask < 255) & (mask > 0)] = mask[(mask < 255) & (mask > 0)] / maxn
    mask = np.clip(mask, 0, 1)
    return mask.astype(float)
		`@ -0,0 +1,4 @@`
							`from modelscope.hub.snapshot_download import snapshot_download`
							`model_dir = snapshot_download('damo/cv_unet_person-image-cartoon_compound-models', cache_dir='.')`
		`@ -0,0 +1,4 @@`

							`Copyright (c) Peppa_Pig_Face_Engine`

							`https://github.com/610265158/Peppa_Pig_Face_Engine`