update

3 years ago · 32c9f31e7b
parent c08e84e872
commit 32c9f31e7b
33 changed files with 1617 additions and 1 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
--- a/.idea/DCT-Net.iml
+++ b/.idea/DCT-Net.iml
@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="pytest" />
+  </component>
+</module>
--- a/.idea/deployment.xml
+++ b/.idea/deployment.xml
@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" autoUpload="Always" serverName="11.160.138.24">
+    <serverData>
+      <paths name="11.160.138.24">
+        <serverdata>
+          <mappings>
+            <mapping deploy="/" local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+    <option name="myAutoUpload" value="ALWAYS" />
+  </component>
+</project>
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@ -0,0 +1,110 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="97">
+            <item index="0" class="java.lang.String" itemvalue="GPUtil" />
+            <item index="1" class="java.lang.String" itemvalue="torch" />
+            <item index="2" class="java.lang.String" itemvalue="torchvision" />
+            <item index="3" class="java.lang.String" itemvalue="pandas" />
+            <item index="4" class="java.lang.String" itemvalue="scikit-image" />
+            <item index="5" class="java.lang.String" itemvalue="scipy" />
+            <item index="6" class="java.lang.String" itemvalue="opencv-python" />
+            <item index="7" class="java.lang.String" itemvalue="numpy" />
+            <item index="8" class="java.lang.String" itemvalue="Pillow" />
+            <item index="9" class="java.lang.String" itemvalue="protobuf" />
+            <item index="10" class="java.lang.String" itemvalue="decorator" />
+            <item index="11" class="java.lang.String" itemvalue="networkx" />
+            <item index="12" class="java.lang.String" itemvalue="scikit-learn" />
+            <item index="13" class="java.lang.String" itemvalue="python-dateutil" />
+            <item index="14" class="java.lang.String" itemvalue="imageio-ffmpeg" />
+            <item index="15" class="java.lang.String" itemvalue="cloudpickle" />
+            <item index="16" class="java.lang.String" itemvalue="requests" />
+            <item index="17" class="java.lang.String" itemvalue="PyWavelets" />
+            <item index="18" class="java.lang.String" itemvalue="certifi" />
+            <item index="19" class="java.lang.String" itemvalue="urllib3" />
+            <item index="20" class="java.lang.String" itemvalue="pyparsing" />
+            <item index="21" class="java.lang.String" itemvalue="six" />
+            <item index="22" class="java.lang.String" itemvalue="ffmpeg-python" />
+            <item index="23" class="java.lang.String" itemvalue="kiwisolver" />
+            <item index="24" class="java.lang.String" itemvalue="tqdm" />
+            <item index="25" class="java.lang.String" itemvalue="imageio" />
+            <item index="26" class="java.lang.String" itemvalue="toolz" />
+            <item index="27" class="java.lang.String" itemvalue="future" />
+            <item index="28" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="29" class="java.lang.String" itemvalue="tensorboardX" />
+            <item index="30" class="java.lang.String" itemvalue="dask" />
+            <item index="31" class="java.lang.String" itemvalue="pytz" />
+            <item index="32" class="java.lang.String" itemvalue="idna" />
+            <item index="33" class="java.lang.String" itemvalue="PyYAML" />
+            <item index="34" class="java.lang.String" itemvalue="cffi" />
+            <item index="35" class="java.lang.String" itemvalue="pycparser" />
+            <item index="36" class="java.lang.String" itemvalue="pygit" />
+            <item index="37" class="java.lang.String" itemvalue="Werkzeug" />
+            <item index="38" class="java.lang.String" itemvalue="blessings" />
+            <item index="39" class="java.lang.String" itemvalue="wget" />
+            <item index="40" class="java.lang.String" itemvalue="dominate" />
+            <item index="41" class="java.lang.String" itemvalue="psutil" />
+            <item index="42" class="java.lang.String" itemvalue="torchprofile" />
+            <item index="43" class="java.lang.String" itemvalue="tensorboard" />
+            <item index="44" class="java.lang.String" itemvalue="grpcio" />
+            <item index="45" class="java.lang.String" itemvalue="olefile" />
+            <item index="46" class="java.lang.String" itemvalue="Markdown" />
+            <item index="47" class="java.lang.String" itemvalue="pycocotools" />
+            <item index="48" class="java.lang.String" itemvalue="nvidia-ml-py3" />
+            <item index="49" class="java.lang.String" itemvalue="jedi" />
+            <item index="50" class="java.lang.String" itemvalue="MNNCV" />
+            <item index="51" class="java.lang.String" itemvalue="boto3" />
+            <item index="52" class="java.lang.String" itemvalue="watchdog" />
+            <item index="53" class="java.lang.String" itemvalue="botocore" />
+            <item index="54" class="java.lang.String" itemvalue="validators" />
+            <item index="55" class="java.lang.String" itemvalue="streamlit" />
+            <item index="56" class="java.lang.String" itemvalue="toml" />
+            <item index="57" class="java.lang.String" itemvalue="MNN" />
+            <item index="58" class="java.lang.String" itemvalue="pyrsistent" />
+            <item index="59" class="java.lang.String" itemvalue="pytorch-fid" />
+            <item index="60" class="java.lang.String" itemvalue="visdom" />
+            <item index="61" class="java.lang.String" itemvalue="lpips" />
+            <item index="62" class="java.lang.String" itemvalue="joblib" />
+            <item index="63" class="java.lang.String" itemvalue="oss2" />
+            <item index="64" class="java.lang.String" itemvalue="imgaug" />
+            <item index="65" class="java.lang.String" itemvalue="opencv_python" />
+            <item index="66" class="java.lang.String" itemvalue="absl-py" />
+            <item index="67" class="java.lang.String" itemvalue="wandb" />
+            <item index="68" class="java.lang.String" itemvalue="opencv-python-headless" />
+            <item index="69" class="java.lang.String" itemvalue="ninja" />
+            <item index="70" class="java.lang.String" itemvalue="chardet" />
+            <item index="71" class="java.lang.String" itemvalue="cycler" />
+            <item index="72" class="java.lang.String" itemvalue="kornia" />
+            <item index="73" class="java.lang.String" itemvalue="pytorch-lightning" />
+            <item index="74" class="java.lang.String" itemvalue="trimesh" />
+            <item index="75" class="java.lang.String" itemvalue="omegaconf" />
+            <item index="76" class="java.lang.String" itemvalue="opencv-contrib-python" />
+            <item index="77" class="java.lang.String" itemvalue="pyglet" />
+            <item index="78" class="java.lang.String" itemvalue="PyMCubes" />
+            <item index="79" class="java.lang.String" itemvalue="chumpy" />
+            <item index="80" class="java.lang.String" itemvalue="plyfile" />
+            <item index="81" class="java.lang.String" itemvalue="yacs" />
+            <item index="82" class="java.lang.String" itemvalue="torchmetrics" />
+            <item index="83" class="java.lang.String" itemvalue="dataclasses" />
+            <item index="84" class="java.lang.String" itemvalue="test-tube" />
+            <item index="85" class="java.lang.String" itemvalue="rtree" />
+            <item index="86" class="java.lang.String" itemvalue="lark-parser" />
+            <item index="87" class="java.lang.String" itemvalue="PyEXR" />
+            <item index="88" class="java.lang.String" itemvalue="commentjson" />
+            <item index="89" class="java.lang.String" itemvalue="pybind11" />
+            <item index="90" class="java.lang.String" itemvalue="open3d" />
+            <item index="91" class="java.lang.String" itemvalue="opendr" />
+            <item index="92" class="java.lang.String" itemvalue="h5py" />
+            <item index="93" class="java.lang.String" itemvalue="tokenizers" />
+            <item index="94" class="java.lang.String" itemvalue="transformers" />
+            <item index="95" class="java.lang.String" itemvalue="yapf" />
+            <item index="96" class="java.lang.String" itemvalue="addict" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="JavaScriptSettings">
+    <option name="languageLevel" value="ES6" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (mnn_python)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/DCT-Net.iml" filepath="$PROJECT_DIR$/.idea/DCT-Net.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/.idea/webServers.xml
+++ b/.idea/webServers.xml
@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="WebServers">
+    <option name="servers">
+      <webServer id="97e6de3f-73ca-4b14-a177-10b25899d222" name="11.160.138.24" url="http://11.160.138.24">
+        <fileTransfer rootFolder="/data/qingyao/gitProjects/DCT-Net" accessType="SFTP" host="11.160.138.24" port="22" sshConfigId="80d63b94-77a9-490e-a638-71580f352c36" sshConfig="myf272609@11.160.138.24:22 password">
+          <advancedOptions>
+            <advancedOptions dataProtectionLevel="Private" passiveMode="true" shareSSLContext="true" />
+          </advancedOptions>
+        </fileTransfer>
+      </webServer>
+    </option>
+  </component>
+</project>
--- a/README.md
+++ b/README.md
@ -19,13 +19,54 @@ Official implementation of DCT-Net for Portrait Stylization.

 ## News
 (2022-07-07) The paper is available now at arxiv(https://arxiv.org/abs/2207.02426).
+(2022-08-08) cartoon function can be directly call from pythonSDK of [modelscope](https://modelscope.cn/#/models).
+(2022-08-08) The pertained model and infer code of 'anime' style is available now. More styles coming soon.


 ## Requirements
+* python 3
+* tensorflow (>=1.14)
+* easydict
+* numpy
+* both CPU/GPU are supported


 ## Quick Start

+### From python SDK
+A quick use with python SDK
+
+- Installation:
+```bash
+conda create -n dctnet python=3.8
+conda activate dctnet
+pip install tensorflow
+pip install "modelscope[cv]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
+```
+
+- Downloads:
+```bash
+python download.py
+```
+
+- Inference:
+```bash
+python run_sdk.py
+```
+
+
+### From source code
+```bash
+python run.py
+```
+
+
+## Acknowledgments
+
+Face detector and aligner are adapted from [Peppa_Pig_Face_Engine](https://github.com/610265158/Peppa_Pig_Face_Engine
+) and [InsightFace](https://github.com/TreB1eN/InsightFace_Pytorch).
+
+

 ## Citation

@ -40,6 +81,13 @@ If you find this code useful for your research, please use the following BibTeX
  number={4},
  pages={1--9},
  year={2022},
-  publisher={ACM Vancouver, BC, Canada}
+  publisher={ACM New York, NY, USA}
 }
 ```
+
+
+
+
+
+
+
--- a/assets/demo.gif
+++ b/assets/demo.gif
--- a/download.py
+++ b/download.py
@ -0,0 +1,4 @@
+from modelscope.hub.snapshot_download import snapshot_download
+model_dir = snapshot_download('damo/cv_unet_person-image-cartoon_compound-models', cache_dir='.')
+
+
--- a/run.py
+++ b/run.py
@ -0,0 +1,23 @@
+
+import cv2
+from source.cartoonize import Cartoonizer
+import os
+
+def process():
+
+    algo = Cartoonizer(dataroot='damo/cv_unet_person-image-cartoon_compound-models')
+    img = cv2.imread('input.png')[...,::-1]
+
+    result = algo.cartoonize(img)
+
+    cv2.imwrite('res.png', result)
+    print('finished!')
+
+
+
+
+if __name__ == '__main__':
+    process()
+
+
+
--- a/run_sdk.py
+++ b/run_sdk.py
@ -0,0 +1,12 @@
+import cv2
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+
+img_cartoon = pipeline(Tasks.image_portrait_stylization, 'damo/cv_unet_person-image-cartoon_compound-models')
+img_cartoon = pipeline('image-portrait-stylization')
+result = img_cartoon('input.png')
+
+cv2.imwrite('result.png', result['output_img'])
+
+
+
--- a/source/.DS_Store
+++ b/source/.DS_Store
--- a/source/init.py
+++ b/source/init.py
--- a/source/cartoonize.py
+++ b/source/cartoonize.py
@ -0,0 +1,120 @@
+import os
+import cv2
+import tensorflow as tf
+import numpy as np
+from source.facelib.facer import FaceAna
+import source.utils as utils
+from source.mtcnn_pytorch.src.align_trans import warp_and_crop_face, get_reference_facial_points
+
+if tf.__version__ >= '2.0':
+    tf = tf.compat.v1
+    tf.disable_eager_execution()
+
+
+class Cartoonizer():
+    def __init__(self, dataroot):
+
+        self.facer = FaceAna(dataroot)
+        self.sess_head = self.load_sess(
+            os.path.join(dataroot, 'cartoon_anime_h.pb'), 'model_head')
+        self.sess_bg = self.load_sess(
+            os.path.join(dataroot, 'cartoon_anime_bg.pb'), 'model_bg')
+
+        self.box_width = 288
+        global_mask = cv2.imread(os.path.join(dataroot, 'alpha.jpg'))
+        global_mask = cv2.resize(
+            global_mask, (self.box_width, self.box_width),
+            interpolation=cv2.INTER_AREA)
+        self.global_mask = cv2.cvtColor(
+            global_mask, cv2.COLOR_BGR2GRAY).astype(np.float32) / 255.0
+
+    def load_sess(self, model_path, name):
+        config = tf.ConfigProto(allow_soft_placement=True)
+        config.gpu_options.allow_growth = True
+        sess = tf.Session(config=config)
+        print(f'loading model from {model_path}')
+        with tf.gfile.FastGFile(model_path, 'rb') as f:
+            graph_def = tf.GraphDef()
+            graph_def.ParseFromString(f.read())
+            sess.graph.as_default()
+            tf.import_graph_def(graph_def, name=name)
+            sess.run(tf.global_variables_initializer())
+        print(f'load model {model_path} done.')
+        return sess
+
+
+    def detect_face(self, img):
+        src_h, src_w, _ = img.shape
+        src_x = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        boxes, landmarks, _ = self.facer.run(src_x)
+        if boxes.shape[0] == 0:
+            return None
+        else:
+            return landmarks
+
+
+    def cartoonize(self, img):
+        # img: RGB input
+        ori_h, ori_w, _ = img.shape
+        img = utils.resize_size(img, size=720)
+
+        img_brg = img[:, :, ::-1]
+
+        # background process
+        pad_bg, pad_h, pad_w = utils.padTo16x(img_brg)
+
+        bg_res = self.sess_bg.run(
+            self.sess_bg.graph.get_tensor_by_name(
+                'model_bg/output_image:0'),
+            feed_dict={'model_bg/input_image:0': pad_bg})
+        res = bg_res[:pad_h, :pad_w, :]
+
+        landmarks = self.detect_face(img_brg)
+        if landmarks is None:
+            print('No face detected!')
+            return res
+
+        print('%d faces detected!'%len(landmarks))
+        for landmark in landmarks:
+            # get facial 5 points
+            f5p = utils.get_f5p(landmark, img_brg)
+
+            # face alignment
+            head_img, trans_inv = warp_and_crop_face(
+                img,
+                f5p,
+                ratio=0.75,
+                reference_pts=get_reference_facial_points(default_square=True),
+                crop_size=(self.box_width, self.box_width),
+                return_trans_inv=True)
+
+            # head process
+            head_res = self.sess_head.run(
+                self.sess_head.graph.get_tensor_by_name(
+                    'model_head/output_image:0'),
+                feed_dict={
+                    'model_head/input_image:0': head_img[:, :, ::-1]
+                })
+
+            # merge head and background
+            head_trans_inv = cv2.warpAffine(
+                head_res,
+                trans_inv, (np.size(img, 1), np.size(img, 0)),
+                borderValue=(0, 0, 0))
+
+            mask = self.global_mask
+            mask_trans_inv = cv2.warpAffine(
+                mask,
+                trans_inv, (np.size(img, 1), np.size(img, 0)),
+                borderValue=(0, 0, 0))
+            mask_trans_inv = np.expand_dims(mask_trans_inv, 2)
+
+            res = mask_trans_inv * head_trans_inv + (1 - mask_trans_inv) * res
+
+        res = cv2.resize(res, (ori_w, ori_h), interpolation=cv2.INTER_AREA)
+
+        return res
+
+
+
+
--- a/source/facelib/LICENSE
+++ b/source/facelib/LICENSE
@ -0,0 +1,4 @@
+
+Copyright (c) Peppa_Pig_Face_Engine
+
+https://github.com/610265158/Peppa_Pig_Face_Engine
--- a/source/facelib/LK/init.py
+++ b/source/facelib/LK/init.py
--- a/source/facelib/LK/lk.py
+++ b/source/facelib/LK/lk.py
@ -0,0 +1,97 @@
+import numpy as np
+
+from modelscope.models.cv.cartoon.facelib.config import config as cfg
+
+
+class GroupTrack():
+
+    def __init__(self):
+        self.old_frame = None
+        self.previous_landmarks_set = None
+        self.with_landmark = True
+        self.thres = cfg.TRACE.pixel_thres
+        self.alpha = cfg.TRACE.smooth_landmark
+        self.iou_thres = cfg.TRACE.iou_thres
+
+    def calculate(self, img, current_landmarks_set):
+        if self.previous_landmarks_set is None:
+            self.previous_landmarks_set = current_landmarks_set
+            result = current_landmarks_set
+        else:
+            previous_lm_num = self.previous_landmarks_set.shape[0]
+            if previous_lm_num == 0:
+                self.previous_landmarks_set = current_landmarks_set
+                result = current_landmarks_set
+                return result
+            else:
+                result = []
+                for i in range(current_landmarks_set.shape[0]):
+                    not_in_flag = True
+                    for j in range(previous_lm_num):
+                        if self.iou(current_landmarks_set[i],
+                                    self.previous_landmarks_set[j]
+                                    ) > self.iou_thres:
+                            result.append(
+                                self.smooth(current_landmarks_set[i],
+                                            self.previous_landmarks_set[j]))
+                            not_in_flag = False
+                            break
+                    if not_in_flag:
+                        result.append(current_landmarks_set[i])
+
+        result = np.array(result)
+        self.previous_landmarks_set = result
+
+        return result
+
+    def iou(self, p_set0, p_set1):
+        rec1 = [
+            np.min(p_set0[:, 0]),
+            np.min(p_set0[:, 1]),
+            np.max(p_set0[:, 0]),
+            np.max(p_set0[:, 1])
+        ]
+        rec2 = [
+            np.min(p_set1[:, 0]),
+            np.min(p_set1[:, 1]),
+            np.max(p_set1[:, 0]),
+            np.max(p_set1[:, 1])
+        ]
+
+        # computing area of each rectangles
+        S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
+        S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
+
+        # computing the sum_area
+        sum_area = S_rec1 + S_rec2
+
+        # find the each edge of intersect rectangle
+        x1 = max(rec1[0], rec2[0])
+        y1 = max(rec1[1], rec2[1])
+        x2 = min(rec1[2], rec2[2])
+        y2 = min(rec1[3], rec2[3])
+
+        # judge if there is an intersect
+        intersect = max(0, x2 - x1) * max(0, y2 - y1)
+
+        iou = intersect / (sum_area - intersect)
+        return iou
+
+    def smooth(self, now_landmarks, previous_landmarks):
+        result = []
+        for i in range(now_landmarks.shape[0]):
+            x = now_landmarks[i][0] - previous_landmarks[i][0]
+            y = now_landmarks[i][1] - previous_landmarks[i][1]
+            dis = np.sqrt(np.square(x) + np.square(y))
+            if dis < self.thres:
+                result.append(previous_landmarks[i])
+            else:
+                result.append(
+                    self.do_moving_average(now_landmarks[i],
+                                           previous_landmarks[i]))
+
+        return np.array(result)
+
+    def do_moving_average(self, p_now, p_previous):
+        p = self.alpha * p_now + (1 - self.alpha) * p_previous
+        return p
--- a/source/facelib/init.py
+++ b/source/facelib/init.py
--- a/source/facelib/config.py
+++ b/source/facelib/config.py
@ -0,0 +1,23 @@
+import os
+
+import numpy as np
+from easydict import EasyDict as edict
+
+config = edict()
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+config.DETECT = edict()
+config.DETECT.topk = 10
+config.DETECT.thres = 0.8
+config.DETECT.input_shape = (512, 512, 3)
+config.KEYPOINTS = edict()
+config.KEYPOINTS.p_num = 68
+config.KEYPOINTS.base_extend_range = [0.2, 0.3]
+config.KEYPOINTS.input_shape = (160, 160, 3)
+config.TRACE = edict()
+config.TRACE.pixel_thres = 1
+config.TRACE.smooth_box = 0.3
+config.TRACE.smooth_landmark = 0.95
+config.TRACE.iou_thres = 0.5
+config.DATA = edict()
+config.DATA.pixel_means = np.array([123., 116., 103.])  # RGB
--- a/source/facelib/face_detector.py
+++ b/source/facelib/face_detector.py
@ -0,0 +1,116 @@
+import time
+
+import cv2
+import numpy as np
+import tensorflow as tf
+
+from .config import config as cfg
+
+if tf.__version__ >= '2.0':
+    tf = tf.compat.v1
+
+
+class FaceDetector:
+
+    def __init__(self, dir):
+
+        self.model_path = dir + '/detector.pb'
+        self.thres = cfg.DETECT.thres
+        self.input_shape = cfg.DETECT.input_shape
+
+        self._graph = tf.Graph()
+
+        with self._graph.as_default():
+            self._graph, self._sess = self.init_model(self.model_path)
+
+            self.input_image = tf.get_default_graph().get_tensor_by_name(
+                'tower_0/images:0')
+            self.training = tf.get_default_graph().get_tensor_by_name(
+                'training_flag:0')
+            self.output_ops = [
+                tf.get_default_graph().get_tensor_by_name('tower_0/boxes:0'),
+                tf.get_default_graph().get_tensor_by_name('tower_0/scores:0'),
+                tf.get_default_graph().get_tensor_by_name(
+                    'tower_0/num_detections:0'),
+            ]
+
+    def __call__(self, image):
+
+        image, scale_x, scale_y = self.preprocess(
+            image,
+            target_width=self.input_shape[1],
+            target_height=self.input_shape[0])
+
+        image = np.expand_dims(image, 0)
+
+        boxes, scores, num_boxes = self._sess.run(
+            self.output_ops,
+            feed_dict={
+                self.input_image: image,
+                self.training: False
+            })
+
+        num_boxes = num_boxes[0]
+        boxes = boxes[0][:num_boxes]
+
+        scores = scores[0][:num_boxes]
+
+        to_keep = scores > self.thres
+        boxes = boxes[to_keep]
+        scores = scores[to_keep]
+
+        y1 = self.input_shape[0] / scale_y
+        x1 = self.input_shape[1] / scale_x
+        y2 = self.input_shape[0] / scale_y
+        x2 = self.input_shape[1] / scale_x
+        scaler = np.array([y1, x1, y2, x2], dtype='float32')
+        boxes = boxes * scaler
+
+        scores = np.expand_dims(scores, 0).reshape([-1, 1])
+
+        for i in range(boxes.shape[0]):
+            boxes[i] = np.array(
+                [boxes[i][1], boxes[i][0], boxes[i][3], boxes[i][2]])
+        return np.concatenate([boxes, scores], axis=1)
+
+    def preprocess(self, image, target_height, target_width, label=None):
+
+        h, w, c = image.shape
+
+        bimage = np.zeros(
+            shape=[target_height, target_width, c],
+            dtype=image.dtype) + np.array(
+                cfg.DATA.pixel_means, dtype=image.dtype)
+        long_side = max(h, w)
+
+        scale_x = scale_y = target_height / long_side
+
+        image = cv2.resize(image, None, fx=scale_x, fy=scale_y)
+
+        h_, w_, _ = image.shape
+        bimage[:h_, :w_, :] = image
+
+        return bimage, scale_x, scale_y
+
+    def init_model(self, *args):
+        pb_path = args[0]
+
+        def init_pb(model_path):
+            config = tf.ConfigProto()
+            config.gpu_options.per_process_gpu_memory_fraction = 0.2
+            compute_graph = tf.Graph()
+            compute_graph.as_default()
+            sess = tf.Session(config=config)
+            with tf.gfile.GFile(model_path, 'rb') as fid:
+                graph_def = tf.GraphDef()
+                graph_def.ParseFromString(fid.read())
+                tf.import_graph_def(graph_def, name='')
+
+            return (compute_graph, sess)
+
+        model = init_pb(pb_path)
+
+        graph = model[0]
+        sess = model[1]
+
+        return graph, sess
--- a/source/facelib/face_landmark.py
+++ b/source/facelib/face_landmark.py
@ -0,0 +1,154 @@
+import cv2
+import numpy as np
+import tensorflow as tf
+
+from .config import config as cfg
+
+if tf.__version__ >= '2.0':
+    tf = tf.compat.v1
+
+
+class FaceLandmark:
+
+    def __init__(self, dir):
+        self.model_path = dir + '/keypoints.pb'
+        self.min_face = 60
+        self.keypoint_num = cfg.KEYPOINTS.p_num * 2
+
+        self._graph = tf.Graph()
+
+        with self._graph.as_default():
+
+            self._graph, self._sess = self.init_model(self.model_path)
+            self.img_input = tf.get_default_graph().get_tensor_by_name(
+                'tower_0/images:0')
+            self.embeddings = tf.get_default_graph().get_tensor_by_name(
+                'tower_0/prediction:0')
+            self.training = tf.get_default_graph().get_tensor_by_name(
+                'training_flag:0')
+
+            self.landmark = self.embeddings[:, :self.keypoint_num]
+            self.headpose = self.embeddings[:, -7:-4] * 90.
+            self.state = tf.nn.sigmoid(self.embeddings[:, -4:])
+
+    def __call__(self, img, bboxes):
+        landmark_result = []
+        state_result = []
+        for i, bbox in enumerate(bboxes):
+            landmark, state = self._one_shot_run(img, bbox, i)
+            if landmark is not None:
+                landmark_result.append(landmark)
+                state_result.append(state)
+        return np.array(landmark_result), np.array(state_result)
+
+    def simple_run(self, cropped_img):
+        with self._graph.as_default():
+
+            cropped_img = np.expand_dims(cropped_img, axis=0)
+            landmark, p, states = self._sess.run(
+                [self.landmark, self.headpose, self.state],
+                feed_dict={
+                    self.img_input: cropped_img,
+                    self.training: False
+                })
+
+        return landmark, states
+
+    def _one_shot_run(self, image, bbox, i):
+
+        bbox_width = bbox[2] - bbox[0]
+        bbox_height = bbox[3] - bbox[1]
+        if (bbox_width <= self.min_face and bbox_height <= self.min_face):
+            return None, None
+        add = int(max(bbox_width, bbox_height))
+        bimg = cv2.copyMakeBorder(
+            image,
+            add,
+            add,
+            add,
+            add,
+            borderType=cv2.BORDER_CONSTANT,
+            value=cfg.DATA.pixel_means)
+        bbox += add
+
+        one_edge = (1 + 2 * cfg.KEYPOINTS.base_extend_range[0]) * bbox_width
+        center = [(bbox[0] + bbox[2]) // 2, (bbox[1] + bbox[3]) // 2]
+
+        bbox[0] = center[0] - one_edge // 2
+        bbox[1] = center[1] - one_edge // 2
+        bbox[2] = center[0] + one_edge // 2
+        bbox[3] = center[1] + one_edge // 2
+
+        bbox = bbox.astype(np.int)
+        crop_image = bimg[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
+        h, w, _ = crop_image.shape
+        crop_image = cv2.resize(
+            crop_image,
+            (cfg.KEYPOINTS.input_shape[1], cfg.KEYPOINTS.input_shape[0]))
+        crop_image = crop_image.astype(np.float32)
+
+        keypoints, state = self.simple_run(crop_image)
+
+        res = keypoints[0][:self.keypoint_num].reshape((-1, 2))
+        res[:, 0] = res[:, 0] * w / cfg.KEYPOINTS.input_shape[1]
+        res[:, 1] = res[:, 1] * h / cfg.KEYPOINTS.input_shape[0]
+
+        landmark = []
+        for _index in range(res.shape[0]):
+            x_y = res[_index]
+            landmark.append([
+                int(x_y[0] * cfg.KEYPOINTS.input_shape[0] + bbox[0] - add),
+                int(x_y[1] * cfg.KEYPOINTS.input_shape[1] + bbox[1] - add)
+            ])
+
+        landmark = np.array(landmark, np.float32)
+
+        return landmark, state
+
+    def init_model(self, *args):
+
+        if len(args) == 1:
+            use_pb = True
+            pb_path = args[0]
+        else:
+            use_pb = False
+            meta_path = args[0]
+            restore_model_path = args[1]
+
+        def ini_ckpt():
+            graph = tf.Graph()
+            graph.as_default()
+            configProto = tf.ConfigProto()
+            configProto.gpu_options.allow_growth = True
+            sess = tf.Session(config=configProto)
+            # load_model(model_path, sess)
+            saver = tf.train.import_meta_graph(meta_path)
+            saver.restore(sess, restore_model_path)
+
+            print('Model restred!')
+            return (graph, sess)
+
+        def init_pb(model_path):
+            config = tf.ConfigProto()
+            config.gpu_options.per_process_gpu_memory_fraction = 0.2
+            compute_graph = tf.Graph()
+            compute_graph.as_default()
+            sess = tf.Session(config=config)
+            with tf.gfile.GFile(model_path, 'rb') as fid:
+                graph_def = tf.GraphDef()
+                graph_def.ParseFromString(fid.read())
+                tf.import_graph_def(graph_def, name='')
+
+            # saver = tf.train.Saver(tf.global_variables())
+            # saver.save(sess, save_path='./tmp.ckpt')
+            return (compute_graph, sess)
+
+        if use_pb:
+            model = init_pb(pb_path)
+        else:
+            model = ini_ckpt()
+
+        graph = model[0]
+        sess = model[1]
+
+        return graph, sess
--- a/source/facelib/facer.py
+++ b/source/facelib/facer.py
@ -0,0 +1,150 @@
+import time
+
+import cv2
+import numpy as np
+
+from .config import config as cfg
+from .face_detector import FaceDetector
+from .face_landmark import FaceLandmark
+from .LK.lk import GroupTrack
+
+
+class FaceAna():
+    '''
+    by default the top3 facea sorted by area will be calculated for time reason
+    '''
+
+    def __init__(self, model_dir):
+        self.face_detector = FaceDetector(model_dir)
+        self.face_landmark = FaceLandmark(model_dir)
+        self.trace = GroupTrack()
+
+        self.track_box = None
+        self.previous_image = None
+        self.previous_box = None
+
+        self.diff_thres = 5
+        self.top_k = cfg.DETECT.topk
+        self.iou_thres = cfg.TRACE.iou_thres
+        self.alpha = cfg.TRACE.smooth_box
+
+    def run(self, image):
+
+        boxes = self.face_detector(image)
+
+        if boxes.shape[0] > self.top_k:
+            boxes = self.sort(boxes)
+
+        boxes_return = np.array(boxes)
+        landmarks, states = self.face_landmark(image, boxes)
+
+        if 1:
+            track = []
+            for i in range(landmarks.shape[0]):
+                track.append([
+                    np.min(landmarks[i][:, 0]),
+                    np.min(landmarks[i][:, 1]),
+                    np.max(landmarks[i][:, 0]),
+                    np.max(landmarks[i][:, 1])
+                ])
+            tmp_box = np.array(track)
+
+            self.track_box = self.judge_boxs(boxes_return, tmp_box)
+
+        self.track_box, landmarks = self.sort_res(self.track_box, landmarks)
+        return self.track_box, landmarks, states
+
+    def sort_res(self, bboxes, points):
+        area = []
+        for bbox in bboxes:
+            bbox_width = bbox[2] - bbox[0]
+            bbox_height = bbox[3] - bbox[1]
+            area.append(bbox_height * bbox_width)
+
+        area = np.array(area)
+        picked = area.argsort()[::-1]
+        sorted_bboxes = [bboxes[x] for x in picked]
+        sorted_points = [points[x] for x in picked]
+        return np.array(sorted_bboxes), np.array(sorted_points)
+
+    def diff_frames(self, previous_frame, image):
+        if previous_frame is None:
+            return True
+        else:
+            _diff = cv2.absdiff(previous_frame, image)
+            diff = np.sum(
+                _diff) / previous_frame.shape[0] / previous_frame.shape[1] / 3.
+            return diff > self.diff_thres
+
+    def sort(self, bboxes):
+        if self.top_k > 100:
+            return bboxes
+        area = []
+        for bbox in bboxes:
+
+            bbox_width = bbox[2] - bbox[0]
+            bbox_height = bbox[3] - bbox[1]
+            area.append(bbox_height * bbox_width)
+
+        area = np.array(area)
+
+        picked = area.argsort()[-self.top_k:][::-1]
+        sorted_bboxes = [bboxes[x] for x in picked]
+        return np.array(sorted_bboxes)
+
+    def judge_boxs(self, previuous_bboxs, now_bboxs):
+
+        def iou(rec1, rec2):
+
+            # computing area of each rectangles
+            S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
+            S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
+
+            # computing the sum_area
+            sum_area = S_rec1 + S_rec2
+
+            # find the each edge of intersect rectangle
+            x1 = max(rec1[0], rec2[0])
+            y1 = max(rec1[1], rec2[1])
+            x2 = min(rec1[2], rec2[2])
+            y2 = min(rec1[3], rec2[3])
+
+            # judge if there is an intersect
+            intersect = max(0, x2 - x1) * max(0, y2 - y1)
+
+            return intersect / (sum_area - intersect)
+
+        if previuous_bboxs is None:
+            return now_bboxs
+
+        result = []
+
+        for i in range(now_bboxs.shape[0]):
+            contain = False
+            for j in range(previuous_bboxs.shape[0]):
+                if iou(now_bboxs[i], previuous_bboxs[j]) > self.iou_thres:
+                    result.append(
+                        self.smooth(now_bboxs[i], previuous_bboxs[j]))
+                    contain = True
+                    break
+            if not contain:
+                result.append(now_bboxs[i])
+
+        return np.array(result)
+
+    def smooth(self, now_box, previous_box):
+
+        return self.do_moving_average(now_box[:4], previous_box[:4])
+
+    def do_moving_average(self, p_now, p_previous):
+        p = self.alpha * p_now + (1 - self.alpha) * p_previous
+        return p
+
+    def reset(self):
+        '''
+        reset the previous info used foe tracking,
+        :return:
+        '''
+        self.track_box = None
+        self.previous_image = None
+        self.previous_box = None
--- a/source/mtcnn_pytorch/LICENSE
+++ b/source/mtcnn_pytorch/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Dan Antoshchenko
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/source/mtcnn_pytorch/README.md
+++ b/source/mtcnn_pytorch/README.md
@ -0,0 +1,26 @@
+# MTCNN
+
+`pytorch` implementation of **inference stage** of face detection algorithm described in
+[Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878).
+
+## Example
+![example of a face detection](images/example.png)
+
+## How to use it
+Just download the repository and then do this
+```python
+from src import detect_faces
+from PIL import Image
+
+image = Image.open('image.jpg')
+bounding_boxes, landmarks = detect_faces(image)
+```
+For examples see `test_on_images.ipynb`.
+
+## Requirements
+* pytorch 0.2
+* Pillow, numpy
+
+## Credit
+This implementation is heavily inspired by:
+* [pangyupo/mxnet_mtcnn_face_detection](https://github.com/pangyupo/mxnet_mtcnn_face_detection)
--- a/source/mtcnn_pytorch/init.py
+++ b/source/mtcnn_pytorch/init.py
--- a/source/mtcnn_pytorch/src/init.py
+++ b/source/mtcnn_pytorch/src/init.py
--- a/source/mtcnn_pytorch/src/align_trans.py
+++ b/source/mtcnn_pytorch/src/align_trans.py
@ -0,0 +1,187 @@
+"""
+Created on Mon Apr 24 15:43:29 2017
+@author: zhaoy
+"""
+import cv2
+import numpy as np
+
+from .matlab_cp2tform import get_similarity_transform_for_cv2
+
+# reference facial points, a list of coordinates (x,y)
+dx = 1
+dy = 1
+REFERENCE_FACIAL_POINTS = [
+    [30.29459953 + dx, 51.69630051 + dy],  # left eye
+    [65.53179932 + dx, 51.50139999 + dy],  # right eye
+    [48.02519989 + dx, 71.73660278 + dy],  # nose
+    [33.54930115 + dx, 92.3655014 + dy],  # left mouth
+    [62.72990036 + dx, 92.20410156 + dy]  # right mouth
+]
+
+DEFAULT_CROP_SIZE = (96, 112)
+
+global FACIAL_POINTS
+
+
+class FaceWarpException(Exception):
+
+    def __str__(self):
+        return 'In File {}:{}'.format(__file__, super.__str__(self))
+
+
+def get_reference_facial_points(output_size=None,
+                                inner_padding_factor=0.0,
+                                outer_padding=(0, 0),
+                                default_square=False):
+
+    tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
+    tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
+
+    # 0) make the inner region a square
+    if default_square:
+        size_diff = max(tmp_crop_size) - tmp_crop_size
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += size_diff
+
+    h_crop = tmp_crop_size[0]
+    w_crop = tmp_crop_size[1]
+    if (output_size):
+        if (output_size[0] == h_crop and output_size[1] == w_crop):
+            return tmp_5pts
+
+    if (inner_padding_factor == 0 and outer_padding == (0, 0)):
+        if output_size is None:
+            return tmp_5pts
+        else:
+            raise FaceWarpException(
+                'No paddings to do, output_size must be None or {}'.format(
+                    tmp_crop_size))
+
+    # check output size
+    if not (0 <= inner_padding_factor <= 1.0):
+        raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
+
+    factor = inner_padding_factor > 0 or outer_padding[0] > 0
+    factor = factor or outer_padding[1] > 0
+    if (factor and output_size is None):
+        output_size = tmp_crop_size * \
+            (1 + inner_padding_factor * 2).astype(np.int32)
+        output_size += np.array(outer_padding)
+
+    cond1 = outer_padding[0] < output_size[0]
+    cond2 = outer_padding[1] < output_size[1]
+    if not (cond1 and cond2):
+        raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
+                                'and outer_padding[1] < output_size[1])')
+
+    # 1) pad the inner region according inner_padding_factor
+    if inner_padding_factor > 0:
+        size_diff = tmp_crop_size * inner_padding_factor * 2
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += np.round(size_diff).astype(np.int32)
+
+    # 2) resize the padded inner region
+    size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
+
+    if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[
+            1] * tmp_crop_size[0]:
+        raise FaceWarpException(
+            'Must have (output_size - outer_padding)'
+            '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
+
+    scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
+    tmp_5pts = tmp_5pts * scale_factor
+
+    # 3) add outer_padding to make output_size
+    reference_5point = tmp_5pts + np.array(outer_padding)
+
+    return reference_5point
+
+
+def get_affine_transform_matrix(src_pts, dst_pts):
+
+    tfm = np.float32([[1, 0, 0], [0, 1, 0]])
+    n_pts = src_pts.shape[0]
+    ones = np.ones((n_pts, 1), src_pts.dtype)
+    src_pts_ = np.hstack([src_pts, ones])
+    dst_pts_ = np.hstack([dst_pts, ones])
+
+    A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
+
+    if rank == 3:
+        tfm = np.float32([[A[0, 0], A[1, 0], A[2, 0]],
+                          [A[0, 1], A[1, 1], A[2, 1]]])
+    elif rank == 2:
+        tfm = np.float32([[A[0, 0], A[1, 0], 0], [A[0, 1], A[1, 1], 0]])
+
+    return tfm
+
+
+def warp_and_crop_face(src_img,
+                       facial_pts,
+                       ratio=0.84,
+                       reference_pts=None,
+                       crop_size=(96, 112),
+                       align_type='similarity'
+                       '',
+                       return_trans_inv=False):
+
+    if reference_pts is None:
+        if crop_size[0] == 96 and crop_size[1] == 112:
+            reference_pts = REFERENCE_FACIAL_POINTS
+        else:
+            default_square = False
+            inner_padding_factor = 0
+            outer_padding = (0, 0)
+            output_size = crop_size
+
+            reference_pts = get_reference_facial_points(
+                output_size, inner_padding_factor, outer_padding,
+                default_square)
+
+    ref_pts = np.float32(reference_pts)
+
+    factor = ratio
+    ref_pts = (ref_pts - 112 / 2) * factor + 112 / 2
+    ref_pts *= crop_size[0] / 112.
+
+    ref_pts_shp = ref_pts.shape
+    if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
+        raise FaceWarpException(
+            'reference_pts.shape must be (K,2) or (2,K) and K>2')
+
+    if ref_pts_shp[0] == 2:
+        ref_pts = ref_pts.T
+
+    src_pts = np.float32(facial_pts)
+    src_pts_shp = src_pts.shape
+    if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
+        raise FaceWarpException(
+            'facial_pts.shape must be (K,2) or (2,K) and K>2')
+
+    if src_pts_shp[0] == 2:
+        src_pts = src_pts.T
+
+    if src_pts.shape != ref_pts.shape:
+        raise FaceWarpException(
+            'facial_pts and reference_pts must have the same shape')
+
+    if align_type == 'cv2_affine':
+        tfm = cv2.getAffineTransform(src_pts, ref_pts)
+        tfm_inv = cv2.getAffineTransform(ref_pts, src_pts)
+
+    elif align_type == 'affine':
+        tfm = get_affine_transform_matrix(src_pts, ref_pts)
+        tfm_inv = get_affine_transform_matrix(ref_pts, src_pts)
+    else:
+        tfm, tfm_inv = get_similarity_transform_for_cv2(src_pts, ref_pts)
+
+    face_img = cv2.warpAffine(
+        src_img,
+        tfm, (crop_size[0], crop_size[1]),
+        borderValue=(255, 255, 255))
+
+    if return_trans_inv:
+        return face_img, tfm_inv
+    else:
+        return face_img
--- a/source/mtcnn_pytorch/src/matlab_cp2tform.py
+++ b/source/mtcnn_pytorch/src/matlab_cp2tform.py
@ -0,0 +1,339 @@
+"""
+Created on Tue Jul 11 06:54:28 2017
+
+@author: zhaoyafei
+"""
+
+import numpy as np
+from numpy.linalg import inv, lstsq
+from numpy.linalg import matrix_rank as rank
+from numpy.linalg import norm
+
+
+class MatlabCp2tormException(Exception):
+
+    def __str__(self):
+        return 'In File {}:{}'.format(__file__, super.__str__(self))
+
+
+def tformfwd(trans, uv):
+    """
+    Function:
+    ----------
+        apply affine transform 'trans' to uv
+
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of transformed coordinates (x, y)
+    """
+    uv = np.hstack((uv, np.ones((uv.shape[0], 1))))
+    xy = np.dot(uv, trans)
+    xy = xy[:, 0:-1]
+    return xy
+
+
+def tforminv(trans, uv):
+    """
+    Function:
+    ----------
+        apply the inverse of affine transform 'trans' to uv
+
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of inverse-transformed coordinates (x, y)
+    """
+    Tinv = inv(trans)
+    xy = tformfwd(Tinv, uv)
+    return xy
+
+
+def findNonreflectiveSimilarity(uv, xy, options=None):
+
+    options = {'K': 2}
+
+    K = options['K']
+    M = xy.shape[0]
+    x = xy[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    y = xy[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    # print('--->x, y:\n', x, y
+
+    tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
+    tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
+    X = np.vstack((tmp1, tmp2))
+    # print('--->X.shape: ', X.shape
+    # print('X:\n', X
+
+    u = uv[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    v = uv[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    U = np.vstack((u, v))
+    # print('--->U.shape: ', U.shape
+    # print('U:\n', U
+
+    # We know that X * r = U
+    if rank(X) >= 2 * K:
+        r, _, _, _ = lstsq(X, U)
+        r = np.squeeze(r)
+    else:
+        raise Exception('cp2tform:twoUniquePointsReq')
+
+    # print('--->r:\n', r
+
+    sc = r[0]
+    ss = r[1]
+    tx = r[2]
+    ty = r[3]
+
+    Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]])
+
+    # print('--->Tinv:\n', Tinv
+
+    T = inv(Tinv)
+    # print('--->T:\n', T
+
+    T[:, 2] = np.array([0, 0, 1])
+
+    return T, Tinv
+
+
+def findSimilarity(uv, xy, options=None):
+
+    options = {'K': 2}
+
+    #    uv = np.array(uv)
+    #    xy = np.array(xy)
+
+    # Solve for trans1
+    trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
+
+    # Solve for trans2
+
+    # manually reflect the xy data across the Y-axis
+    xyR = xy
+    xyR[:, 0] = -1 * xyR[:, 0]
+
+    trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
+
+    # manually reflect the tform to undo the reflection done on xyR
+    TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
+
+    trans2 = np.dot(trans2r, TreflectY)
+
+    # Figure out if trans1 or trans2 is better
+    xy1 = tformfwd(trans1, uv)
+    norm1 = norm(xy1 - xy)
+
+    xy2 = tformfwd(trans2, uv)
+    norm2 = norm(xy2 - xy)
+
+    if norm1 <= norm2:
+        return trans1, trans1_inv
+    else:
+        trans2_inv = inv(trans2)
+        return trans2, trans2_inv
+
+
+def get_similarity_transform(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'trans':
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y, 1] = [u, v, 1] * trans
+
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        @reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+
+    Returns:
+    ----------
+       @trans: 3x3 np.array
+            transform matrix from uv to xy
+        trans_inv: 3x3 np.array
+            inverse of trans, transform matrix from xy to uv
+    """
+
+    if reflective:
+        trans, trans_inv = findSimilarity(src_pts, dst_pts)
+    else:
+        trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
+
+    return trans, trans_inv
+
+
+def cvt_tform_mat_for_cv2(trans):
+    """
+    Function:
+    ----------
+        Convert Transform Matrix 'trans' into 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix from uv to xy
+
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    cv2_trans = trans[:, 0:2].T
+
+    return cv2_trans
+
+
+def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
+    cv2_trans = cvt_tform_mat_for_cv2(trans)
+    cv2_trans_inv = cvt_tform_mat_for_cv2(trans_inv)
+
+    return cv2_trans, cv2_trans_inv
+
+
+if __name__ == '__main__':
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+
+    # In Matlab, run:
+    #
+    #   uv = [u'; v'];
+    #   xy = [x'; y'];
+    #   tform_sim=cp2tform(uv,xy,'similarity');
+    #
+    #   trans = tform_sim.tdata.T
+    #   ans =
+    #       -0.0764   -1.6190         0
+    #        1.6190   -0.0764         0
+    #       -3.2156    0.0290    1.0000
+    #   trans_inv = tform_sim.tdata.Tinv
+    #    ans =
+    #
+    #       -0.0291    0.6163         0
+    #       -0.6163   -0.0291         0
+    #       -0.0756    1.9826    1.0000
+    #    xy_m=tformfwd(tform_sim, u,v)
+    #
+    #    xy_m =
+    #
+    #       -3.2156    0.0290
+    #        1.1833   -9.9143
+    #        5.0323    2.8853
+    #    uv_m=tforminv(tform_sim, x,y)
+    #
+    #    uv_m =
+    #
+    #        0.5698    1.3953
+    #        6.0872    2.2733
+    #       -2.6570    4.3314
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+
+    uv = np.array((u, v)).T
+    xy = np.array((x, y)).T
+
+    print('\n--->uv:')
+    print(uv)
+    print('\n--->xy:')
+    print(xy)
+
+    trans, trans_inv = get_similarity_transform(uv, xy)
+
+    print('\n--->trans matrix:')
+    print(trans)
+
+    print('\n--->trans_inv matrix:')
+    print(trans_inv)
+
+    print('\n---> apply transform to uv')
+    print('\nxy_m = uv_augmented * trans')
+    uv_aug = np.hstack((uv, np.ones((uv.shape[0], 1))))
+    xy_m = np.dot(uv_aug, trans)
+    print(xy_m)
+
+    print('\nxy_m = tformfwd(trans, uv)')
+    xy_m = tformfwd(trans, uv)
+    print(xy_m)
+
+    print('\n---> apply inverse transform to xy')
+    print('\nuv_m = xy_augmented * trans_inv')
+    xy_aug = np.hstack((xy, np.ones((xy.shape[0], 1))))
+    uv_m = np.dot(xy_aug, trans_inv)
+    print(uv_m)
+
+    print('\nuv_m = tformfwd(trans_inv, xy)')
+    uv_m = tformfwd(trans_inv, xy)
+    print(uv_m)
+
+    uv_m = tforminv(trans, xy)
+    print('\nuv_m = tforminv(trans, xy)')
+    print(uv_m)
--- a/source/utils.py
+++ b/source/utils.py
@ -0,0 +1,107 @@
+import os
+
+import cv2
+import numpy as np
+
+
+def resize_size(image, size=720):
+    h, w, c = np.shape(image)
+    if min(h, w) > size:
+        if h > w:
+            h, w = int(size * h / w), size
+        else:
+            h, w = size, int(size * w / h)
+    image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
+    return image
+
+
+def padTo16x(image):
+    h, w, c = np.shape(image)
+    if h % 16 == 0 and w % 16 == 0:
+        return image, h, w
+    nh, nw = (h // 16 + 1) * 16, (w // 16 + 1) * 16
+    img_new = np.ones((nh, nw, 3), np.uint8) * 255
+    img_new[:h, :w, :] = image
+
+    return img_new, h, w
+
+
+def get_f5p(landmarks, np_img):
+    eye_left = find_pupil(landmarks[36:41], np_img)
+    eye_right = find_pupil(landmarks[42:47], np_img)
+    if eye_left is None or eye_right is None:
+        print('cannot find 5 points with find_puil, used mean instead.!')
+        eye_left = landmarks[36:41].mean(axis=0)
+        eye_right = landmarks[42:47].mean(axis=0)
+    nose = landmarks[30]
+    mouth_left = landmarks[48]
+    mouth_right = landmarks[54]
+    f5p = [[eye_left[0], eye_left[1]], [eye_right[0], eye_right[1]],
+           [nose[0], nose[1]], [mouth_left[0], mouth_left[1]],
+           [mouth_right[0], mouth_right[1]]]
+    return f5p
+
+
+def find_pupil(landmarks, np_img):
+    h, w, _ = np_img.shape
+    xmax = int(landmarks[:, 0].max())
+    xmin = int(landmarks[:, 0].min())
+    ymax = int(landmarks[:, 1].max())
+    ymin = int(landmarks[:, 1].min())
+
+    if ymin >= ymax or xmin >= xmax or ymin < 0 or xmin < 0 or ymax > h or xmax > w:
+        return None
+    eye_img_bgr = np_img[ymin:ymax, xmin:xmax, :]
+    eye_img = cv2.cvtColor(eye_img_bgr, cv2.COLOR_BGR2GRAY)
+    eye_img = cv2.equalizeHist(eye_img)
+    n_marks = landmarks - np.array([xmin, ymin]).reshape([1, 2])
+    eye_mask = cv2.fillConvexPoly(
+        np.zeros_like(eye_img), n_marks.astype(np.int32), 1)
+    ret, thresh = cv2.threshold(eye_img, 100, 255,
+                                cv2.THRESH_BINARY | cv2.THRESH_OTSU)
+    thresh = (1 - thresh / 255.) * eye_mask
+    cnt = 0
+    xm = []
+    ym = []
+    for i in range(thresh.shape[0]):
+        for j in range(thresh.shape[1]):
+            if thresh[i, j] > 0.5:
+                xm.append(j)
+                ym.append(i)
+                cnt += 1
+    if cnt != 0:
+        xm.sort()
+        ym.sort()
+        xm = xm[cnt // 2]
+        ym = ym[cnt // 2]
+    else:
+        xm = thresh.shape[1] / 2
+        ym = thresh.shape[0] / 2
+
+    return xm + xmin, ym + ymin
+
+
+def all_file(file_dir):
+    L = []
+    for root, dirs, files in os.walk(file_dir):
+        for file in files:
+            extend = os.path.splitext(file)[1]
+            if extend == '.png' or extend == '.jpg' or extend == '.jpeg':
+                L.append(os.path.join(root, file))
+    return L
+
+def initialize_mask(box_width):
+    h, w = [box_width, box_width]
+    mask = np.zeros((h, w), np.uint8)
+
+    center = (int(w / 2), int(h / 2))
+    axes = (int(w * 0.4), int(h * 0.49))
+    mask = cv2.ellipse(img=mask, center=center, axes=axes, angle=0, startAngle=0, endAngle=360, color=(1),
+                       thickness=-1)
+    mask = cv2.distanceTransform(mask, cv2.DIST_L2, 3)
+
+    maxn = max(w, h) * 0.15
+    mask[(mask < 255) & (mask > 0)] = mask[(mask < 255) & (mask > 0)] / maxn
+    mask = np.clip(mask, 0, 1)
+
+    return mask.astype(float)