From 56dc9fa3d2bf85b04b6ce1a52cbd2fb5c9a6d816 Mon Sep 17 00:00:00 2001
From: Anton Tetov <anton@tetov.se>
Date: Wed, 22 Jun 2022 16:08:49 +0200
Subject: [PATCH] wip

---
 .vscode/launch.json          |  39 ++++
 data/vattenhallen.data       |   6 +-
 data/veges.data              |   6 +-
 src/farmbot_yolo/darknet.py  | 341 ----------------------------------
 src/farmbot_yolo/detect.py   | 241 ++++++++++++++++--------
 src/farmbot_yolo/location.py | 344 +++++++++++++++++------------------
 src/farmbot_yolo/main.py     | 193 +++++++++++---------
 src/farmbot_yolo/move.py     | 106 ++++-------
 src/farmbot_yolo/try.py      |  14 --
 static/distance.txt          |   6 +-
 10 files changed, 526 insertions(+), 770 deletions(-)
 create mode 100644 .vscode/launch.json
 delete mode 100644 src/farmbot_yolo/darknet.py
 delete mode 100644 src/farmbot_yolo/try.py

diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..e63ed1d
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,39 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "detect",
+            "type": "python",
+            "request": "launch",
+            "module": "farmbot_yolo.detect",
+            "justMyCode": false,
+            "args": ["--draw"]
+        },
+        {
+            "name": "download",
+            "type": "python",
+            "request": "launch",
+            "module": "farmbot_yolo.download",
+            "justMyCode": false,
+            "args":["tmp/"]
+        },
+        {
+            "name": "location",
+            "type": "python",
+            "request": "launch",
+            "module": "farmbot_yolo.location",
+            "justMyCode": false,
+        },
+        {
+            "name": "scan",
+            "type": "python",
+            "request": "launch",
+            "module": "farmbot_yolo.move",
+            "justMyCode": false,
+            "args": ["-m", "2"]
+        }
+    ]
+}
diff --git a/data/vattenhallen.data b/data/vattenhallen.data
index f6812d7..2f64fda 100644
--- a/data/vattenhallen.data
+++ b/data/vattenhallen.data
@@ -1,6 +1,6 @@
 classes= 7
-train  = ../dataset/train.list
-valid  = ../dataset/test.list
-names = /home/xzleo/farmbot/dataset/classes.txt
+train  = /home/bot/farmbot/farmbot_yolo/dataset/train.list
+valid  = /home/bot/farmbot/farmbot_yolo/dataset/test.list
+names = /home/bot/farmbot/farmbot_yolo/dataset/classes.txt
 backup = backup
 
diff --git a/data/veges.data b/data/veges.data
index 174c19a..4e25f34 100644
--- a/data/veges.data
+++ b/data/veges.data
@@ -1,6 +1,6 @@
 classes= 10
-train  = /home/xzleo/farmbot/dataset/train.txt
-valid  = /home/xzleo/farmbot/dataset/test.txt
-names = /home/xzleo/farmbot/dataset/classes.txt
+train  = /home/bot/farmbot/farmbot_yolo/dataset/train.txt
+valid  = /home/bot/farmbot/farmbot_yolo/dataset/test.txt
+names = /home/bot/farmbot/farmbot_yolo/dataset/classes.txt
 backup = backup
 
diff --git a/src/farmbot_yolo/darknet.py b/src/farmbot_yolo/darknet.py
deleted file mode 100644
index c9c3116..0000000
--- a/src/farmbot_yolo/darknet.py
+++ /dev/null
@@ -1,341 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Don't touch the code!
-Python 3 wrapper for identifying objects in images
-
-Running the script requires opencv-python to be installed (`pip install opencv-python`)
-Directly viewing or returning bounding-boxed images requires scikit-image to be installed (`pip install scikit-image`)
-Use pip3 instead of pip on some systems to be sure to install modules for python3
-"""
-
-from ctypes import *
-import math
-import random
-import os
-
-
-class BOX(Structure):
-    _fields_ = [("x", c_float),
-                ("y", c_float),
-                ("w", c_float),
-                ("h", c_float)]
-
-
-class DETECTION(Structure):
-    _fields_ = [("bbox", BOX),
-                ("classes", c_int),
-                ("best_class_idx", c_int),
-                ("prob", POINTER(c_float)),
-                ("mask", POINTER(c_float)),
-                ("objectness", c_float),
-                ("sort_class", c_int),
-                ("uc", POINTER(c_float)),
-                ("points", c_int),
-                ("embeddings", POINTER(c_float)),
-                ("embedding_size", c_int),
-                ("sim", c_float),
-                ("track_id", c_int)]
-
-class DETNUMPAIR(Structure):
-    _fields_ = [("num", c_int),
-                ("dets", POINTER(DETECTION))]
-
-
-class IMAGE(Structure):
-    _fields_ = [("w", c_int),
-                ("h", c_int),
-                ("c", c_int),
-                ("data", POINTER(c_float))]
-
-
-class METADATA(Structure):
-    _fields_ = [("classes", c_int),
-                ("names", POINTER(c_char_p))]
-
-
-def network_width(net):
-    return lib.network_width(net)
-
-
-def network_height(net):
-    return lib.network_height(net)
-
-
-def bbox2points(bbox):
-    """
-    From bounding box yolo format
-    to corner points cv2 rectangle
-    """
-    x, y, w, h = bbox
-    xmin = int(round(x - (w / 2)))
-    xmax = int(round(x + (w / 2)))
-    ymin = int(round(y - (h / 2)))
-    ymax = int(round(y + (h / 2)))
-    return xmin, ymin, xmax, ymax
-
-
-def class_colors(names):
-    """
-    Create a dict with one random BGR color for each
-    class name
-    """
-    return {name: (
-        random.randint(0, 255),
-        random.randint(0, 255),
-        random.randint(0, 255)) for name in names}
-
-
-def load_network(config_file, data_file, weights, batch_size=1):
-    """
-    load model description and weights from config files
-    args:
-        config_file (str): path to .cfg model file
-        data_file (str): path to .data model file
-        weights (str): path to weights
-    returns:
-        network: trained model
-        class_names
-        class_colors
-    """
-    network = load_net_custom(
-        config_file.encode("ascii"),
-        weights.encode("ascii"), 0, batch_size)
-    metadata = load_meta(data_file.encode("ascii"))
-    class_names = [metadata.names[i].decode("ascii") for i in range(metadata.classes)]
-    colors = class_colors(class_names)
-    return network, class_names, colors
-
-
-def print_detections(detections, coordinates=False):
-    print("\nObjects:")
-    for label, confidence, bbox in detections:
-        x, y, w, h = bbox
-        if coordinates:
-            print("{}: {}%    (left_x: {:.0f}   top_y:  {:.0f}   width:   {:.0f}   height:  {:.0f})".format(label, confidence, x, y, w, h))
-        else:
-            print("{}: {}%".format(label, confidence))
-
-
-def draw_boxes(detections, image, colors):
-    import cv2
-    for label, confidence, bbox in detections:
-        left, top, right, bottom = bbox2points(bbox)
-        cv2.rectangle(image, (left, top), (right, bottom), colors[label], 1)
-        cv2.putText(image, "{} [{:.2f}]".format(label, float(confidence)),
-                    (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
-                    colors[label], 2)
-    return image
-
-
-def decode_detection(detections):
-    decoded = []
-    for label, confidence, bbox in detections:
-        confidence = str(round(confidence * 100, 2))
-        decoded.append((str(label), confidence, bbox))
-    return decoded
-
-# https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
-# Malisiewicz et al.
-def non_max_suppression_fast(detections, overlap_thresh):
-    boxes = []
-    for detection in detections:
-        _, _, _, (x, y, w, h) = detection
-        x1 = x - w / 2
-        y1 = y - h / 2
-        x2 = x + w / 2
-        y2 = y + h / 2
-        boxes.append(np.array([x1, y1, x2, y2]))
-    boxes_array = np.array(boxes)
-
-    # initialize the list of picked indexes
-    pick = []
-    # grab the coordinates of the bounding boxes
-    x1 = boxes_array[:, 0]
-    y1 = boxes_array[:, 1]
-    x2 = boxes_array[:, 2]
-    y2 = boxes_array[:, 3]
-    # compute the area of the bounding boxes and sort the bounding
-    # boxes by the bottom-right y-coordinate of the bounding box
-    area = (x2 - x1 + 1) * (y2 - y1 + 1)
-    idxs = np.argsort(y2)
-    # keep looping while some indexes still remain in the indexes
-    # list
-    while len(idxs) > 0:
-        # grab the last index in the indexes list and add the
-        # index value to the list of picked indexes
-        last = len(idxs) - 1
-        i = idxs[last]
-        pick.append(i)
-        # find the largest (x, y) coordinates for the start of
-        # the bounding box and the smallest (x, y) coordinates
-        # for the end of the bounding box
-        xx1 = np.maximum(x1[i], x1[idxs[:last]])
-        yy1 = np.maximum(y1[i], y1[idxs[:last]])
-        xx2 = np.minimum(x2[i], x2[idxs[:last]])
-        yy2 = np.minimum(y2[i], y2[idxs[:last]])
-        # compute the width and height of the bounding box
-        w = np.maximum(0, xx2 - xx1 + 1)
-        h = np.maximum(0, yy2 - yy1 + 1)
-        # compute the ratio of overlap
-        overlap = (w * h) / area[idxs[:last]]
-        # delete all indexes from the index list that have
-        idxs = np.delete(idxs, np.concatenate(([last],
-                                               np.where(overlap > overlap_thresh)[0])))
-        # return only the bounding boxes that were picked using the
-        # integer data type
-    return [detections[i] for i in pick]
-
-def remove_negatives(detections, class_names, num):
-    """
-    Remove all classes with 0% confidence within the detection
-    """
-    predictions = []
-    for j in range(num):
-        for idx, name in enumerate(class_names):
-            if detections[j].prob[idx] > 0:
-                bbox = detections[j].bbox
-                bbox = (bbox.x, bbox.y, bbox.w, bbox.h)
-                predictions.append((name, detections[j].prob[idx], (bbox)))
-    return predictions
-
-
-def remove_negatives_faster(detections, class_names, num):
-    """
-    Faster version of remove_negatives (very useful when using yolo9000)
-    """
-    predictions = []
-    for j in range(num):
-        if detections[j].best_class_idx == -1:
-            continue
-        name = class_names[detections[j].best_class_idx]
-        bbox = detections[j].bbox
-        bbox = (bbox.x, bbox.y, bbox.w, bbox.h)
-        predictions.append((name, detections[j].prob[detections[j].best_class_idx], bbox))
-    return predictions
-
-
-def detect_image(network,  class_names, image, thresh=.5, hier_thresh=.5, nms=.45): #
-    """
-        Returns a list with highest confidence class and their bbox
-    """
-    pnum = pointer(c_int(0))
-    predict_image(network, image)  # image 需要什么类型
-    detections = get_network_boxes(network, image.w, image.h,
-                                   thresh, hier_thresh, None, 0, pnum, 0)
-    #print_detections(detections, coordinates=True)                 
-    num = pnum[0]
-    if nms:
-        do_nms_sort(detections, num, len(class_names), nms)
-    predictions = remove_negatives(detections, class_names, num)
-    predictions = decode_detection(predictions)
-    free_detections(detections, num)
-    return sorted(predictions, key=lambda x: x[1])
-
-
-if os.name == "posix":
-    cwd = os.path.abspath(os.path.join(os.getcwd(), "..")) # the one in Alexy's repo use current path, changing by our project structure, 
-    lib = CDLL(cwd + "/darknet/libdarknet.so", RTLD_GLOBAL)
-elif os.name == "nt":
-    cwd = os.path.dirname(__file__)
-    os.environ['PATH'] = cwd + ';' + os.environ['PATH']
-    lib = CDLL("darknet.dll", RTLD_GLOBAL)
-else:
-    print("Unsupported OS")
-    exit
-
-lib.network_width.argtypes = [c_void_p]
-lib.network_width.restype = c_int
-lib.network_height.argtypes = [c_void_p]
-lib.network_height.restype = c_int
-
-copy_image_from_bytes = lib.copy_image_from_bytes
-copy_image_from_bytes.argtypes = [IMAGE,c_char_p]
-
-predict = lib.network_predict_ptr
-predict.argtypes = [c_void_p, POINTER(c_float)]
-predict.restype = POINTER(c_float)
-
-set_gpu = lib.cuda_set_device
-init_cpu = lib.init_cpu
-
-make_image = lib.make_image
-make_image.argtypes = [c_int, c_int, c_int]
-make_image.restype = IMAGE
-
-get_network_boxes = lib.get_network_boxes
-get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int), c_int]
-get_network_boxes.restype = POINTER(DETECTION)
-
-make_network_boxes = lib.make_network_boxes
-make_network_boxes.argtypes = [c_void_p]
-make_network_boxes.restype = POINTER(DETECTION)
-
-free_detections = lib.free_detections
-free_detections.argtypes = [POINTER(DETECTION), c_int]
-
-free_batch_detections = lib.free_batch_detections
-free_batch_detections.argtypes = [POINTER(DETNUMPAIR), c_int]
-
-free_ptrs = lib.free_ptrs
-free_ptrs.argtypes = [POINTER(c_void_p), c_int]
-
-network_predict = lib.network_predict_ptr
-network_predict.argtypes = [c_void_p, POINTER(c_float)]
-
-reset_rnn = lib.reset_rnn
-reset_rnn.argtypes = [c_void_p]
-
-load_net = lib.load_network
-load_net.argtypes = [c_char_p, c_char_p, c_int]
-load_net.restype = c_void_p
-
-load_net_custom = lib.load_network_custom
-load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int]
-load_net_custom.restype = c_void_p
-
-free_network_ptr = lib.free_network_ptr
-free_network_ptr.argtypes = [c_void_p]
-free_network_ptr.restype = c_void_p
-
-do_nms_obj = lib.do_nms_obj
-do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
-
-do_nms_sort = lib.do_nms_sort
-do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
-
-free_image = lib.free_image
-free_image.argtypes = [IMAGE]
-
-letterbox_image = lib.letterbox_image
-letterbox_image.argtypes = [IMAGE, c_int, c_int]
-letterbox_image.restype = IMAGE
-
-load_meta = lib.get_metadata
-lib.get_metadata.argtypes = [c_char_p]
-lib.get_metadata.restype = METADATA
-
-load_image = lib.load_image_color
-load_image.argtypes = [c_char_p, c_int, c_int]
-load_image.restype = IMAGE
-
-rgbgr_image = lib.rgbgr_image
-rgbgr_image.argtypes = [IMAGE]
-
-predict_image = lib.network_predict_image
-predict_image.argtypes = [c_void_p, IMAGE]
-predict_image.restype = POINTER(c_float)
-
-predict_image_letterbox = lib.network_predict_image_letterbox
-predict_image_letterbox.argtypes = [c_void_p, IMAGE]
-predict_image_letterbox.restype = POINTER(c_float)
-
-network_predict_batch = lib.network_predict_batch
-network_predict_batch.argtypes = [c_void_p, IMAGE, c_int, c_int, c_int,
-                                   c_float, c_float, POINTER(c_int), c_int, c_int]
-network_predict_batch.restype = POINTER(DETNUMPAIR)
-
-if __name__ == "__main__":
-    net = load_network("/home/xzleo/farmbot/darknet/cfg/yolov3-veges-test.cfg", "/home/xzleo/farmbot/darknet/data/veges.data", "/home/xzleo/farmbot/darknet/backup/yolov3-veges_best.weights")
-    img =  load_image(b"/home/xzleo/farmbot/dataset/2_a.png", 0, 0)
-    predictions = detect_image(net, img, thresh=.5, hier_thresh=.5, nms=.45)
\ No newline at end of file
diff --git a/src/farmbot_yolo/detect.py b/src/farmbot_yolo/detect.py
index 5f32963..252b251 100644
--- a/src/farmbot_yolo/detect.py
+++ b/src/farmbot_yolo/detect.py
@@ -1,72 +1,98 @@
-'''
+"""
 load images taken by the camera, return bounding boxes
 
 customized based on Alexy/darknet_images.py
-'''
-from argparse import ArgumentParser, Namespace
-import os
-import glob
+"""
+import json
 import random
-# from typing_extensions import final
-import darknet  # darknet.py
 import time
+from argparse import ArgumentParser
+from argparse import Namespace
+from pathlib import Path
+
 import cv2
-import numpy as np
+from darknet import darknet
+
+from farmbot_yolo import REPO
+from farmbot_yolo import TMPDIR
 
 
 def check_arguments_errors(args):
-    assert 0 < args.thresh < 1, "Threshold should be a float between zero and one (non-inclusive)"
-    if not os.path.exists(args.config_file):
-        raise(ValueError("Invalid config path {}".format(os.path.abspath(args.config_file))))
-    if not os.path.exists(args.weights):
-        raise(ValueError("Invalid weight path {}".format(os.path.abspath(args.weights))))
-    if not os.path.exists(args.data_file):
-        raise(ValueError("Invalid data file path {}".format(os.path.abspath(args.data_file))))
-    if args.input and not os.path.exists(args.input):
-        raise(ValueError("Invalid image path {}".format(os.path.abspath(args.input))))
-
-
-def load_images(images_path):
+    assert (
+        0 < args.thresh < 1
+    ), "Threshold should be a float between zero and one (non-inclusive)"
+    if not args.config_file.exists():
+        raise (ValueError(f"Invalid config path {args.config_file}"))
+    if not args.weights.exists():
+        raise (ValueError(f"Invalid weight path {args.weight}"))
+    if not args.data_file.exists():
+        raise (ValueError(f"Invalid data file path {args.data_file}"))
+    if args.input and not args.input.exists():
+        raise (ValueError(f"Invalid image path {args.input}"))
+
+
+def load_images(images_path: Path):
     """
     If image path is given, return it directly
     For txt file, read it and return each line as image path
     In other case, it's a folder, return a list with names of each
     jpg, jpeg and png file
     """
-    input_path_extension = images_path.split('.')[-1]
-    if input_path_extension in ['jpg', 'jpeg', 'png']:
+    VALID_IMAGE_EXTS = ("jpg", "jpeg", "png")
+    input_path_extension = images_path.suffix
+    if input_path_extension in VALID_IMAGE_EXTS:
         # single image
         return [images_path]
-    elif input_path_extension == "txt":
+
+    if input_path_extension == "txt":
         with open(images_path, "r") as f:
             return f.read().splitlines()
-    else:
+
+    if images_path.is_dir():
         # folders
-        return glob.glob(
-            os.path.join(images_path, "*.jpg")) + \
-            glob.glob(os.path.join(images_path, "*.png")) + \
-            glob.glob(os.path.join(images_path, "*.jpeg"))
+        jpgs = list(images_path.glob("*.jpg"))
+        jpegs = list(images_path.glob("*.jpeg"))
+        pngs = list(images_path.glob("*.png"))
+        imgs = jpgs + jpegs + pngs
+        return [img for img in imgs if ".bbox" not in img.name]
+
+    raise ValueError(f"Path not recognized as valid input. {images_path}")
 
 
-def image_detection(image_path, network, class_names, class_colors, thresh):
+def image_detection(
+    image_path, network, class_names, class_colors, thresh, draw_bbox=False
+):
     # Darknet doesn't accept numpy images.
     # Create one with image we reuse for each detect
-    # add image.shape as the output 
+    # add image.shape as the output
     width = darknet.network_width(network)
     height = darknet.network_height(network)
     darknet_image = darknet.make_image(width, height, 3)
 
-    image = cv2.imread(image_path)
+    image_path_str = str(image_path)
+    image = cv2.imread(image_path_str)
     image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    image_resized = cv2.resize(image_rgb, (width, height),
-                               interpolation=cv2.INTER_LINEAR)
+    image_resized = cv2.resize(
+        image_rgb, (width, height), interpolation=cv2.INTER_LINEAR
+    )
 
     darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
-    detections = darknet.detect_image(network, class_names, darknet_image, thresh=thresh)
+    detections = darknet.detect_image(
+        network, class_names, darknet_image, thresh=thresh
+    )
     darknet.free_image(darknet_image)
-    resized_image = darknet.draw_boxes(detections, image_resized, class_colors)
 
-    return image.shape, cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB), detections
+    image_w_bboxes = darknet.draw_boxes(detections, image_resized, class_colors)
+
+    original_height = image.shape[1]
+    original_width = image.shape[0]
+    # fix aspect ratio
+    image_w_bboxes = cv2.resize(image_w_bboxes, (original_height, original_width))
+
+    # BGR -> RGB
+    image_w_bboxes = cv2.cvtColor(image_w_bboxes, cv2.COLOR_BGR2RGB)
+
+    return image.shape, image_w_bboxes, detections
 
 
 def convert2relative(image, bbox):
@@ -75,7 +101,7 @@ def convert2relative(image, bbox):
     """
     x, y, w, h = bbox
     height, width, _ = image.shape
-    return x/width, y/height, w/width, h/height
+    return x / width, y / height, w / width, h / height
 
 
 def save_annotations(original_size, name, image, detections, class_names):
@@ -84,25 +110,49 @@ def save_annotations(original_size, name, image, detections, class_names):
     oringinal_size is Ziliang's improvement
     """
     height, width, _ = original_size
-    img_name = os.path.basename(name)
-    file_name = os.path.splitext(img_name)[0] + ".txt"
-    final_file_name = os.path.dirname(name) + '/annotations/' + file_name
-    with open(final_file_name, "w") as f:
-        for label, confidence, bbox in detections:
-            x, y, w, h = convert2relative(image, bbox)
-            label = class_names.index(label)
-            f.write("{} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}\n".format(label, x*width, y*height, w*width, h*height, float(confidence)))
 
+    annotations = []
+
+    for label, confidence, bbox in detections:
+        x, y, w, h = convert2relative(image, bbox)
+
+        annotations.append(
+            {
+                "label": class_names.index(label),
+                "x": x * width,
+                "y": y * height,
+                "w": w * width,
+                "h": h * height,
+                "confidence": float(confidence),
+            }
+        )
+
+    metadata_file = name.with_suffix(".json")
+
+    if metadata_file.exists() and metadata_file.stat().st_size > 0:
+        with metadata_file.open(mode="r") as fp:
+            img_dict = json.load(fp)
+    else:
+        img_dict = {}
+
+    img_dict["detection_annotations"] = annotations
+
+    with metadata_file.open(mode="w") as fp:
+
+        json.dump(img_dict, fp)
+
+    return metadata_file
 
-def detect(args: Namespace)-> None:
+
+def detect(args: Namespace) -> None:
     check_arguments_errors(args)
 
     random.seed(3)  # deterministic bbox colors
     network, class_names, class_colors = darknet.load_network(
-        args.config_file,
-        args.data_file,
-        args.weights,
-        batch_size=args.batch_size
+        str(args.config_file),
+        str(args.data_file),
+        str(args.weights),
+        batch_size=args.batch_size,
     )
 
     images = load_images(args.input)
@@ -113,43 +163,82 @@ def detect(args: Namespace)-> None:
         if args.input:
             if index >= len(images):
                 break
-            image_name = images[index]
+            img_path = Path(images[index])
         else:
-            image_name = input("Enter Image Path: ")
+            img_path = Path(input("Enter Image Path: "))
         prev_time = time.time()
         original_size, resized_image, detections = image_detection(
-            image_name, network, class_names, class_colors, args.thresh
-            )
+            img_path, network, class_names, class_colors, args.thresh
+        )
         if args.save_labels:
-            save_annotations(original_size, image_name, resized_image, detections, class_names)
+            save_annotations(
+                original_size, img_path, resized_image, detections, class_names
+            )
+        if args.draw:
+            cv2.imwrite(str(img_path.with_suffix(".bbox.jpg")), resized_image)
         darknet.print_detections(detections, args.ext_output)
-        fps = int(1/(time.time() - prev_time))
+        fps = int(1 / (time.time() - prev_time))
         print("FPS: {}".format(fps))
         index += 1
 
 
 if __name__ == "__main__":
     parser = ArgumentParser(description="YOLO Object Detection")
-    parser.add_argument("--input", type=str, default="../img",
-                        help="image source. It can be a single image, a"
-                        "txt with paths to them, or a folder. Image valid"
-                        " formats are jpg, jpeg or png."
-                        "If no input is given, ")
-    parser.add_argument("--batch_size", default=1, type=int,
-                        help="number of images to be processed at the same time")
-    parser.add_argument("--weights", default="../weights/yolov3-vattenhallen_best.weights",
-                        help="yolo weights path")
-    parser.add_argument("--ext_output", action='store_true', default=True,
-                        help="display bbox coordinates of detected objects")
-    parser.add_argument("--save_labels", action='store_true', default=True,
-                        help="save detections bbox for each image in yolo format")
-    parser.add_argument("--config_file", default="../cfg/yolov3-vattenhallen-test.cfg",
-                        help="path to config file")
-    parser.add_argument("--data_file", default="../data/vattenhallen.data",
-                        help="path to data file")
-    parser.add_argument("--thresh", type=float, default=.25,
-                        help="remove detections with lower confidence")
-    parser.add_argument('-v', '--verbose', action='store_true', help='Verbose mode')
+    parser.add_argument(
+        "--input",
+        default=TMPDIR,
+        type=Path,
+        help="image source. It can be a single image, a"
+        "txt with paths to them, or a folder. Image valid"
+        " formats are jpg, jpeg or png."
+        "If no input is given, ",
+    )
+    parser.add_argument(
+        "--batch_size",
+        default=1,
+        type=int,
+        help="number of images to be processed at the same time",
+    )
+    parser.add_argument(
+        "--weights",
+        default=REPO / "weights/yolov3-vattenhallen_best.weights",
+        type=Path,
+        help="yolo weights path",
+    )
+    parser.add_argument(
+        "--ext_output",
+        action="store_true",
+        default=True,
+        help="display bbox coordinates of detected objects",
+    )
+    parser.add_argument(
+        "--save_labels",
+        action="store_true",
+        default=True,
+        help="save detections bbox for each image in yolo format",
+    )
+    parser.add_argument(
+        "--config_file",
+        default=REPO / "cfg/yolov3-vattenhallen-test.cfg",
+        type=Path,
+        help="path to config file",
+    )
+    parser.add_argument(
+        "--data_file",
+        type=Path,
+        default=REPO / "data/vattenhallen.data",
+        help="path to data file",
+    )
+    parser.add_argument(
+        "--thresh",
+        type=float,
+        default=0.25,
+        help="remove detections with lower confidence",
+    )
+    parser.add_argument(
+        "-d", "--draw", action="store_true", help="Save annotated images."
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
     arguments = parser.parse_args()
 
     detect(arguments)
diff --git a/src/farmbot_yolo/location.py b/src/farmbot_yolo/location.py
index a9375b8..2f9e1ef 100644
--- a/src/farmbot_yolo/location.py
+++ b/src/farmbot_yolo/location.py
@@ -1,20 +1,20 @@
-'''
-Author: Ziliang Xiong
-This script loads intrinsci camera matrix, which has been determined by calibration with MATLAB.
-It reads YOLO's bounding boxes and calculate their global 2D coordinate on the planting bed accodring to 
-coordinate transform. See details in README.md 
-'''
+"""Convert coordinates local to photos to global coordinates.
+This script loads intrinsci camera matrix, which has been determined by
+calibration using MATLAB. It reads YOLO's bounding boxes and calculate their
+global 2D coordinate on the planting bed accodring to coordinate transform.
+"""
 
+import json
 from argparse import ArgumentParser, Namespace
-from logging import basicConfig, DEBUG, INFO, getLogger
+from logging import DEBUG, INFO, basicConfig, getLogger
 from pathlib import Path
-from numpy import array, ndarray, dot, squeeze
+from typing import Optional, Tuple
+
+from numpy import array, dot, ndarray, squeeze
 from numpy.linalg import inv
-from os import listdir
-from os.path import join, isfile
 from scipy.io import loadmat
-from typing import Tuple, Optional
 
+from farmbot_yolo import LOGDIR, REPO, TMPDIR
 
 """Logger for log file"""
 _LOG = getLogger(__name__)
@@ -25,234 +25,234 @@ BoundingBox = Tuple[float, float, float, float]
 KMatrix = ndarray(shape=(3, 3))
 
 """Allowed input and output file formats."""
-_CAM_EXTENSIONS = 'mat'
-_ANNOTATION_EXTENSIONS = 'txt'
-_LOCATIONS_EXTENSIONS = 'txt'
-_OFFSET_EXTENSIONS = 'txt'
+_CAM_EXTENSIONS = "mat"
+_ANNOTATION_EXTENSIONS = "txt"
+_LOCATIONS_EXTENSIONS = "txt"
+_OFFSET_EXTENSIONS = "txt"
 
 
 """Constant sweeping height"""
-SWEEP_Z = 575 # change according to the setting, z=-100, height is 57.5cm
+SWEEP_Z = 575  # change according to the setting, z=-100, height is 57.5cm
+
 
+def read_offsets(offset_path: Path) -> Tuple[Tuple[float, float]]:
+    """Load the file containing gripper and camera offset in relation to farmbot
+    encoder.
 
-def read_offsets(offset_path: Path) -> Optional[Tuple[int, int]]:
-    '''
-    Load the offsets file, for coordinate transformation
+    Parameters
+    ----------
+    offset_path
+        file path
 
-    :param offset_path: file path
-    :return cam_offset: distance of the camera centroid to z axis of Farmbot (dx, dy)
-            gripper_offset: distance of the gripper centroid to z axis of Farmbot (dx, dy)
-    '''
+    Returns
+    -------
+    tuple of tuple of floats
+        distance of the camera centroid to z axis of Farmbot (dx, dy) &
+        distance of the gripper centroid to z axis of Farmbot (dx, dy)
+    """
     if not offset_path.is_file():
-        _LOG.error('{} is not a file or does not exist'.format(offset_path))
+        _LOG.error("{} is not a file or does not exist".format(offset_path))
         return None
-    
-    if not offset_path.suffix.lstrip('.') in _OFFSET_EXTENSIONS:
-        _LOG.error('{} must have an legal\
-             extension: {}'.format(offset_path, _OFFSET_EXTENSIONS))
+
+    if not offset_path.suffix.lstrip(".") in _OFFSET_EXTENSIONS:
+        _LOG.error(
+            "{} must have an legal\
+             extension: {}".format(
+                offset_path, _OFFSET_EXTENSIONS
+            )
+        )
         return None
 
     try:
-        with open(offset_path, 'r') as f:
+        with open(offset_path, "r") as f:
             offsets = f.readlines()
     except IOError:
-        _LOG.error('Unable to open input file {}.'.format(offset_path))
+        _LOG.error("Unable to open input file {}.".format(offset_path))
         return None
 
     cam_offset = (int(offsets[1]), int(offsets[2]))
     gripper_offset = (int(offsets[4]), int(offsets[5]))
-    _LOG.info('Load the gripper offset\n{}\n and the camera offset \n{}'.format(gripper_offset, cam_offset))
+    _LOG.info(
+        "Load the gripper offset\n{}\n and the camera offset \n{}".format(
+            gripper_offset, cam_offset
+        )
+    )
     return cam_offset, gripper_offset
 
 
-def load_cam_matrix(cam_path: Path) -> Optional[ndarray]: 
-    '''
-    load the mat file that contains camera calibration result, read the intrinsic matrix of the camera
-    :param cam_path: path of the mat file 
-    :return intrinsic_matrix: K matrix of the camera
-    '''
-    if not cam_path.suffix.lstrip('.') == _CAM_EXTENSIONS:
-        _LOG.error('{} has an illegal extension'.format(cam_path))
-        return None
-
-    try:
-        data = loadmat(cam_path)
-    except FileNotFoundError:
-        _LOG.error(' No such file')
-        return None
-        
-    intrinsic_matrix = data['camera_no_distortion'][0, 0][11] 
-    _LOG.info('Load intrinsic_matrix of the camera \n{}'.format(intrinsic_matrix))
-    return intrinsic_matrix
+def load_cam_matrix(cam_path: Path) -> Optional[ndarray]:
+    """Load the mat file that contains camera calibration result.
 
+    Parameters
+    ----------
+    cam_path
+        path of the mat file
 
-def read_locations(locations_path: Path) -> Optional[ndarray]:
-    '''
-    read the locations of farmbot that corresponds to each photo
-
-    param: locations_path: the path of folder locations
-    return: list that contains locations
-    '''
-    if not locations_path.is_dir():
-        _LOG.error('{} is not a directory or does not exist'.format(locations_path))
-        return None
-
-    number_files = len(listdir(locations_path))
-    if number_files != 1:
-        _LOG.error('More than one file of locations found the {}'.format(locations_path))
-        return None
-
-    locations_file = Path(locations_path, [file for file in listdir(locations_path)][0])    
-    if not locations_file.suffix.lstrip('.') in _LOCATIONS_EXTENSIONS:
-        _LOG.error('{} must have an legal\
-             extension: {}'.format(locations_path, _LOCATIONS_EXTENSIONS))
+    Returns
+    -------
+    intrinsic_matrix
+        K matrix of the camera
+    """
+    if not cam_path.suffix.lstrip(".") == _CAM_EXTENSIONS:
+        _LOG.error("{} has an illegal extension".format(cam_path))
         return None
 
     try:
-        with open(locations_file, 'r') as f:
-            locations = f.readlines()
-    except IOError:
-        _LOG.error('Unable to open input file {}.'.format(locations_path))
+        data = loadmat(cam_path)
+    except FileNotFoundError:
+        _LOG.error(" No such file")
         return None
 
-    list_location = []
-    for location in locations:
-        X, Y, Z = location.split()
-        list_location.append([int(X), int(Y), int(Z)])  # integer？？？
-
-    _LOG.info('Load all the locations \n {}'.format(list_location))
-    return array(list_location)
+    intrinsic_matrix = data["camera_no_distortion"][0, 0][11]
+    _LOG.info("Load intrinsic_matrix of the camera \n{}".format(intrinsic_matrix))
+    return intrinsic_matrix
 
 
 def cam_coordinate(pixel_x: int, pixel_y: int, cam_matrix) -> Tuple[float, float]:
-    '''
+    """
     Project one object's pixel coordinate into  the camera coordinate system
 
     Input: detection: a bounding box <x, y, w, h>
            inner_matrix: matrix K that contains focal length and other inner parameters
     Output: object's centroid location in camera coordinate
-    '''
-    normalized_coordinate = dot(inv(cam_matrix.transpose()), array([pixel_x, pixel_y, 1], dtype=float).reshape((3, 1)))
+    """
+    normalized_coordinate = dot(
+        inv(cam_matrix.transpose()),
+        array([pixel_x, pixel_y, 1], dtype=float).reshape((3, 1)),
+    )
     camera_coordinate = squeeze(normalized_coordinate)
     ratio = float(SWEEP_Z / camera_coordinate[2])
-    local_position = (ratio*camera_coordinate[0], ratio*camera_coordinate[1])
-    _LOG.debug('Transfer from pixel coordinate to Camera coordinate. \n {}'.format(local_position))
+    local_position = (ratio * camera_coordinate[0], ratio * camera_coordinate[1])
+    _LOG.debug(
+        "Transfer from pixel coordinate to Camera coordinate. \n {}".format(
+            local_position
+        )
+    )
     return local_position
 
 
-def global_coordinate(cam_coordinate: Tuple[float, float], 
-                      cam_location: Tuple[float, float, float], 
-                      cam_offset: Tuple[int, int], 
-                      gripper_offset: Tuple[int, int]) -> Tuple[float, float]:
-    '''
-    Calculate an object's locaiton in the globale coordinate(see definition in README.md) 
-    by coordinate transform. 
+def global_coordinate(
+    coords_camera: Tuple[float, float],
+    coords_encoders: Tuple[float, float, float],
+    offset_camera: Tuple[float, float],
+    offset_gripper: Tuple[float, float],
+) -> Tuple[float, float]:
+    """Calculate an object's location in global coordinates.
+
+    Parameters
+    ----------
+
+    coords_camera
+        object's centroid location in camera coordinates
+    coords_encoder
+        camera's location reading from the encoder
+    offset_camera
+        Offset between camera centroid and Z-axis of Farmbot (dx, dy)
+    offset_gripper
+        Offset between gripper and Z-axis of Farmbot (dx, dy)
+
+    Returns
+    -------
+    tuple of floats
+        x & y coordinates in global coordinate frame
+    """
+    x_camera, y_camera = coords_camera
+    x_encoders, y_encoders = coords_encoders
+
+    x_delta_camera_gripper = 150
+    y_delta_camera_gripper = 15
 
-    Input: cam_coordinate: object's centroid location in camera coordinate
-           cam_location: camera's location reading from the encoder <x, y, z>
-           cam_offset: cam_offset: distance of the camera centroid to z axis of Farmbot (dx, dy)
-           gripper_offset: distance of the gripper centroid to z axis of Farmbot (dx, dy)
-    Output: global location of a box
-    '''
-    global_x = -cam_coordinate[1] + cam_location[0] + cam_offset[0] + gripper_offset[0]
-    global_y = cam_coordinate[0] + cam_location[1] + cam_offset[1] + gripper_offset[1]
-    return (global_x, global_y)
+    # TODO: Double check
+    x_global = x_camera + x_encoders + x_delta_camera_gripper
+    y_global = y_camera + y_encoders + y_delta_camera_gripper
+    #          411        300          45          0
+
+    return x_global, y_global
 
 
 def cal_location(args: Namespace) -> ndarray:
-    '''
+    """
     main function for this script
-    '''
+    """
     cam_offset, gripper_offset = read_offsets(args.offset)
     K_matrix = load_cam_matrix(args.camera_matrix)
-    list_location = read_locations(args.locations) 
-    # iterate over each annotation file
-    _LOG.info('Global coordinate calculation begins.')
-    list_annotations = listdir(args.annotations)
-    # sort by chronological order  / specific for the filename on Ziliang's PC, change if other names
-    list_annotations.sort() 
+
+    _LOG.info("Global coordinate calculation begins.")
+
+    img_metadata_files = args.input_dir.glob("*.json")
+
     # read annotations
-    for index_photo, annotation_file in enumerate(list_annotations):
-        filepath = Path(args.annotations, annotation_file)
-
-        if not isfile(filepath):
-            _LOG.error('{} is not a file or does not exist'.format(annotation_file))
-            return None
-
-        if not filepath.suffix.lstrip('.') in _ANNOTATION_EXTENSIONS:
-            _LOG.error('{} must have an legal\
-            extension: {}'.format(filepath, _ANNOTATION_EXTENSIONS))
-            return None
-
-        try:
-            with open(filepath, 'r') as f:
-                annotations = f.readlines()
-        except IOError:
-            _LOG.error('Unable to open input file {}.'.format(filepath))
-            return None
-        _LOG.debug('Load annotation {}'.format(annotations))
-
-        list_global_coordinate = []
+    for img_metadata_file in img_metadata_files:
+        with img_metadata_file.open(mode="r") as fp:
+            img_metadata = json.load(fp)
+
+        annotations = img_metadata["detection_annotations"]
+        farmbot_metadata = img_metadata["farmbot_metadata"]
+
+        x_encoders = farmbot_metadata["location"]["x"]
+        y_encoders = farmbot_metadata["location"]["y"]
+
+        _LOG.debug(f"Loaded detection annotations: {annotations}")
+
+        updated_annotations = []
         for annotation in annotations:
-            detection = annotation.split()
-            # read the center_x center_y and class
-            center_x = detection[1]
-            center_y = detection[2]
-            category = detection[0]
-            confidence = detection[5]
+            x_center = annotation["x"]
+            y_center = annotation["y"]
+
             # pixel coordinate to camera coordinate
-            local_coordinate = cam_coordinate(center_x, center_y, K_matrix)
-            print(local_coordinate)
+            x_local, y_local = cam_coordinate(x_center, y_center, K_matrix)
 
             # camera coordinate to global coordinate
-            global_x, global_y = global_coordinate(local_coordinate, 
-                                list_location[index_photo], cam_offset, gripper_offset)
-            list_global_coordinate.append([category, global_x, global_y, confidence])     
-            _LOG.debug(list_global_coordinate[-1])   
-    
-    _LOG.info('Global coordinate calculation is done.')
-    return array(list_global_coordinate)
+            global_x, global_y = global_coordinate(
+                (x_local, y_local), (x_encoders, y_encoders), cam_offset, gripper_offset
+            )
+            annotation["global_x"] = global_x
+            annotation["global_y"] = global_y
 
+            updated_annotations.append(annotation)
 
-if __name__ == '__main__':
-    parser = ArgumentParser(description='Transfer bounding boxes to real world coordinates')
-    parser.add_argument(
-        '-cam',
-        '--camera_matrix',
-        type=Path,
-        default='../static/camera_no_distortion.mat',
-        help='Path to mat file that contains intrinsic camera matrix K'
+        # the list of annotation dicts are added to the main metadata dict and
+        # written back to json file
+        img_metadata["detection_annotations"] = updated_annotations
+
+        with img_metadata_file.open(mode="w") as fp:
+            json.dump(img_metadata, fp)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description="Transfer bounding boxes to real world coordinates"
     )
     parser.add_argument(
-        '-loc',
-        '--locations',
+        "-d",
+        "--input_dir",
         type=Path,
-        default='../img/locations/',
-        help='the path to txt files contains locations from encoders corresponds to each photo'
+        default=TMPDIR,
+        help="Directory with images and metadata.",
     )
     parser.add_argument(
-        '-a',
-        '--annotations',
+        "-cam",
+        "--camera_matrix",
         type=Path,
-        default='../img/annotations',
-        help='the path to txt files contains annotations for each photo'
+        default=REPO / "static/calibration_20220614.mat",
+        help="Path to mat file that contains intrinsic camera matrix K",
     )
     parser.add_argument(
-        '-o',
-        '--offset',
+        "-o",
+        "--offset",
         type=Path,
-        default='../static/distance.txt',
-        help='the txt contains distance offset for camera and gripper'
+        default=REPO / "static/distance.txt",
+        help="the txt contains distance offset for camera and gripper",
     )
     parser.add_argument(
-        '-l',
-        '--log',
+        "-l",
+        "--log",
         type=Path,
-        default='../log/location.log',
-        help='Path to the log file'
+        default=LOGDIR / "location.log",
+        help="Path to the log file",
     )
 
-    parser.add_argument('-v', '--verbose', action='store_true', help='Verbose mode')
+    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode")
     arguments = parser.parse_args()
 
     if arguments.verbose:
@@ -261,5 +261,3 @@ if __name__ == '__main__':
         basicConfig(filename=arguments.log, level=INFO)
 
     cal_location(arguments)
-
-    
\ No newline at end of file
diff --git a/src/farmbot_yolo/main.py b/src/farmbot_yolo/main.py
index 5295b4e..e187136 100644
--- a/src/farmbot_yolo/main.py
+++ b/src/farmbot_yolo/main.py
@@ -1,8 +1,8 @@
-'''
+"""
 Author: Ziliang
 The main script of the project, it calls scripts for movement, detection, and coordinate calculation.
 
-'''
+"""
 from argparse import ArgumentParser, Namespace
 from logging import StringTemplateStyle
 from os import listdir, remove
@@ -19,41 +19,45 @@ from farmbot_yolo.location import *
 
 _LOG = getLogger(__name__)
 
-GRIP_Z = 468 # measure!
+GRIP_Z = 468  # measure!
 SCAN_Z = 0
 ORIGIN_X = 0
 ORIGIN_Y = 0
 ORIGIN_Z = 0
 
-def remove_overlap(table_coordinate:DataFrame, tolerance=50.00)->DataFrame:
-    '''
+
+def remove_overlap(table_coordinate: DataFrame, tolerance=50.00) -> DataFrame:
+    """
     compare every two coordinates, if their Euclidean distance is smaller than tolerance
     , delete the one with lower probability
 
     Choose a reasonable tolerance!!
     :param table_coordinate: pandas dataframe that each row corresponds to a target [class, x, y, confidence]
     :param tolerance: a distance threshold
-    '''
+    """
     num_coordinates, num_col = table_coordinate.shape
-    for i in range(num_coordinates-1):
-        x, y, confidence = table_coordinate.loc[i, ['x','y', 'confidence']]
-        for j in range(i+1, num_coordinates):
-                x_j, y_j, confidence_j = table_coordinate.loc[j, ['x','y', 'confidence']]
-                distance = sqrt((float(x)-float(x_j))*(float(x)-float(x_j)) + (float(y)-float(y_j))*(float(y)-float(y_j)))  
-                if distance <= tolerance:
-                    if confidence < confidence_j:
-                        table_coordinate.drop(i)
-                    else:
-                        table_coordinate.drop(j)
-    return table_coordinate              
+    for i in range(num_coordinates - 1):
+        x, y, confidence = table_coordinate.loc[i, ["x", "y", "confidence"]]
+        for j in range(i + 1, num_coordinates):
+            x_j, y_j, confidence_j = table_coordinate.loc[j, ["x", "y", "confidence"]]
+            distance = sqrt(
+                (float(x) - float(x_j)) * (float(x) - float(x_j))
+                + (float(y) - float(y_j)) * (float(y) - float(y_j))
+            )
+            if distance <= tolerance:
+                if confidence < confidence_j:
+                    table_coordinate.drop(i)
+                else:
+                    table_coordinate.drop(j)
+    return table_coordinate
 
 
-def remove_temp(path: Path)-> None:
-    '''
+def remove_temp(path: Path) -> None:
+    """
     Clean temporary files, i.e., photos, location.txt, annotations
-    '''
+    """
     for filename in listdir(path):
-        file =Path(join(path, filename))
+        file = Path(join(path, filename))
         if file.is_file():
             remove(file)
     return
@@ -77,11 +81,15 @@ def main(args: Namespace):
     list_global_coordinate = cal_location(args)
     _LOG.info("Global coordinate calculation is done.")
     # choose class
-    table_global_coordinate = DataFrame(list_global_coordinate, columns=['class', 'x', 'y', 'confidence'])
+    table_global_coordinate = DataFrame(
+        list_global_coordinate, columns=["class", "x", "y", "confidence"]
+    )
     # remove overlap
     print(table_global_coordinate)
     table_global_coordinate = remove_overlap(table_global_coordinate)
-    goal_class = table_global_coordinate[table_global_coordinate['class']==args.category]
+    goal_class = table_global_coordinate[
+        table_global_coordinate["class"] == args.category
+    ]
     _LOG.info("Choose {}".format(args.category))
     # if there is no desiered class of plants
     if goal_class.empty:
@@ -89,10 +97,10 @@ def main(args: Namespace):
     # move and grip
     num_goals, num_col = goal_class.shape
     for i in range(num_goals):
-        x, y = goal_class.loc[i, ['x','y']]
+        x, y = goal_class.loc[i, ["x", "y"]]
         simple_move(x, y, GRIP_Z, False)
         open()
-        gripper_open() # to make sure the gripper is open before gripping
+        gripper_open()  # to make sure the gripper is open before gripping
         gripper_close()
         # go back to the orgin
         simple_move(x, y, GRIP_Z, False)
@@ -100,87 +108,108 @@ def main(args: Namespace):
     return
 
 
-    
-
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = ArgumentParser(description="YOLOv3 detection on Farmbot")
     # parsers for move
     parser.add_argument(
-        '-p',
-        '--photo',
+        "-p",
+        "--photo",
         type=Path,
         default="../img",
-        help='Mode for FarmBot, 1 for simple move with an assigned detination, 2 for scaning' 
+        help="Mode for FarmBot, 1 for simple move with an assigned detination, 2 for scaning",
     )
     # parsers for detect
-    parser.add_argument("--input", type=str, default="../img",
-                        help="image source. It can be a single image, a"
-                        "txt with paths to them, or a folder. Image valid"
-                        " formats are jpg, jpeg or png."
-                        "If no input is given, ")
-    parser.add_argument("--batch_size", default=1, type=int,
-                        help="number of images to be processed at the same time")
-    parser.add_argument("--weights", default="../weights/yolov3-vattenhallen_best.weights",
-                        help="yolo weights path")
-    parser.add_argument("--ext_output", action='store_true', default=True,
-                        help="display bbox coordinates of detected objects")
-    parser.add_argument("--save_labels", action='store_true', default=True,
-                        help="save detections bbox for each image in yolo format")
-    parser.add_argument("--config_file", default="../cfg/yolov3-vattenhallen-test.cfg",
-                        help="path to config file")
-    parser.add_argument("--data_file", default="../data/vattenhallen.data",
-                        help="path to data file")
-    parser.add_argument("--thresh", type=float, default=.25,
-                        help="remove detections with lower confidence")
+    parser.add_argument(
+        "--input",
+        type=str,
+        default="../img",
+        help="image source. It can be a single image, a"
+        "txt with paths to them, or a folder. Image valid"
+        " formats are jpg, jpeg or png."
+        "If no input is given, ",
+    )
+    parser.add_argument(
+        "--batch_size",
+        default=1,
+        type=int,
+        help="number of images to be processed at the same time",
+    )
+    parser.add_argument(
+        "--weights",
+        default="../weights/yolov3-vattenhallen_best.weights",
+        help="yolo weights path",
+    )
+    parser.add_argument(
+        "--ext_output",
+        action="store_true",
+        default=True,
+        help="display bbox coordinates of detected objects",
+    )
+    parser.add_argument(
+        "--save_labels",
+        action="store_true",
+        default=True,
+        help="save detections bbox for each image in yolo format",
+    )
+    parser.add_argument(
+        "--config_file",
+        default="../cfg/yolov3-vattenhallen-test.cfg",
+        help="path to config file",
+    )
+    parser.add_argument(
+        "--data_file", default="../data/vattenhallen.data", help="path to data file"
+    )
+    parser.add_argument(
+        "--thresh",
+        type=float,
+        default=0.25,
+        help="remove detections with lower confidence",
+    )
     # arguemtns for grip
     parser.add_argument(
-        '-ca',
-        '--category',
+        "-ca",
+        "--category",
         type=int,
-        help='Choose the class of fruits to be picked up. There are tomato, mushroom,\
-        potato, carrot, beetroot, zucchini, hand'
+        help="Choose the class of fruits to be picked up. There are tomato, mushroom,\
+        potato, carrot, beetroot, zucchini, hand",
     )
     # arguments for location
     parser.add_argument(
-        '-cam',
-        '--camera_matrix',
+        "-cam",
+        "--camera_matrix",
         type=Path,
-        default='../static/camera_no_distortion.mat',
-        help='Path to mat file that contains intrinsic camera matrix K'
+        default="../static/camera_no_distortion.mat",
+        help="Path to mat file that contains intrinsic camera matrix K",
     )
     parser.add_argument(
-        '-loc',
-        '--locations',
+        "-loc",
+        "--locations",
         type=Path,
-        default='../img/locations/',
-        help='the path to txt files contains locations from encoders corresponds to each photo'
+        default="../img/locations/",
+        help="the path to txt files contains locations from encoders corresponds to each photo",
     )
     parser.add_argument(
-        '-a',
-        '--annotations',
+        "-a",
+        "--annotations",
         type=Path,
-        default='../img/annotations',
-        help='the path to txt files contains annotations for each photo'
+        default="../img/annotations",
+        help="the path to txt files contains annotations for each photo",
     )
     parser.add_argument(
-        '-o',
-        '--offset',
+        "-o",
+        "--offset",
         type=Path,
-        default='../static/distance.txt',
-        help='the txt contains distance offset for camera and gripper'
+        default="../static/distance.txt",
+        help="the txt contains distance offset for camera and gripper",
     )
     parser.add_argument(
-        '-l',
-        '--log',
-        type=Path,
-        default='../log/main.log',
-        help='Path to the log file'
-    )    
-    parser.add_argument('-v', '--verbose', action='store_true', help='Verbose mode.')
-    arguments = parser.parse_args()
+        "-l", "--log", type=Path, default="../log/main.log", help="Path to the log file"
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode.")
+    args = parser.parse_args()
 
-    if arguments.verbose:
-        basicConfig(filename=arguments.log, level=DEBUG)
+    if args.verbose:
+        basicConfig(filename=args.log, level=DEBUG)
     else:
-        basicConfig(filename=arguments.log, level=INFO)
-    main(arguments)
+        basicConfig(filename=args.log, level=INFO)
+    main(args)
diff --git a/src/farmbot_yolo/move.py b/src/farmbot_yolo/move.py
index 064cf55..f44058d 100644
--- a/src/farmbot_yolo/move.py
+++ b/src/farmbot_yolo/move.py
@@ -21,37 +21,27 @@ from urllib import request
 from datetime import timezone, datetime
 from dateutil.parser import parse
 from requests import get, delete
+from farmbot_yolo import LOGDIR, TMPDIR
 
-import creds
-from client import FarmbotClient
+from farmbot_yolo import creds
+import farmbot_yolo
+from farmbot_yolo.client import FarmbotClient
+from farmbot_yolo.download import download_images
 
 _SWEEEP_HEIGHT = 0
 
-Logger = getLogger(__name__)
-
-
-class Opts:
-    def __init__(self, min_x, max_x, min_y, max_y, delta, offset, flag):
-        self.min_x = min_x
-        self.max_x = max_x
-        self.min_y = min_y
-        self.max_y = max_y
-        self.delta = delta
-        self.offset = offset
-        self.flag = flag
+log = getLogger(__name__)
 
 
 def scan(
-    img_path: Path,
-    location_path: Path,  # smaller delta
+    img_dir: Path,
     min_x=0,
     max_x=1175,
     min_y=0,
     max_y=974,
     delta=300,
     offset=0,
-    flag=True,
-) -> List:  # 里面的数字需要重新测量
+) -> List:
     """
     scan the bed at a certain height, first move along x axis, then y, like a zig zag;
     Taking pictures and record the location of the camera that corresponds to the picture
@@ -62,57 +52,28 @@ def scan(
            max_y: back most point on y axis
            delta: the interval for scaning
            offset:
-           flag: for degging, if true, don't actually drive FarmBot
-    Output: none
     """
-    opts = Opts(min_x, max_x, min_y, max_y, delta, offset, flag)
-    creds = read_credentials()
-
     pts = []
     sweep_y_negative = False
-    for x in range(opts.min_x, opts.max_x, opts.delta):
-        y_range = range(opts.min_y, opts.max_y, opts.delta)
+    for x in range(min_x, max_x, delta):
+        y_range = range(min_y, max_y, delta)
         if sweep_y_negative:
             y_range = reversed(y_range)
         sweep_y_negative = not sweep_y_negative
         for y in y_range:
-            pts.append((x + opts.offset, y + opts.offset))
-
-    Logger.info("Moving pattern generated")
+            pts.append((x + offset, y + offset))
 
-    if opts.flag:
-        Logger.info("Run without sweep")
-        exit()
+    log.info("Moving pattern generated")
 
-    client = FarmbotClient(creds["device_id"], creds["token"])
+    client = FarmbotClient(creds.device_id, creds.token)
     client.move(0, 0, _SWEEEP_HEIGHT)  # ensure moving from original
     for x, y in pts:
         client.move(x, y, _SWEEEP_HEIGHT)  # move camera
         # take_photo(img_path)
         client.take_photo()
     client.shutdown()
-    # write to img/location
-    with open(path.join(location_path, "location.txt"), "w") as f:
-        for postion in pts:
-            f.write("{} {} {}\n".format(postion[0], postion[1], _SWEEEP_HEIGHT))
-    return None
 
-
-def take_photo():
-    client = FarmbotClient(creds.device_id, creds.token)
-    client.take_photo()
-    # download image
-    system("python ./utils/download.py")
-
-
-# def take_photo(img_path: Path):
-#     HERE = path.dirname(__file__)
-#     IMG_DIR = path.join(HERE, img_path)
-
-#     with request.urlopen('http://localhost:8080/?action=snapshot') as photo:
-#         filename = datetime.now().strftime("%Y-%m-%dT%H:%M:%S") + ".jpg"
-#         with open(path.join(IMG_DIR, filename), mode="wb") as save_file:
-#             save_file.write(photo.read())
+    return pts
 
 
 def simple_move(x: int, y: int, z: int) -> None:
@@ -134,40 +95,35 @@ if __name__ == "__main__":
         "-m",
         "--mode",
         type=int,
-        help="Mode for FarmBot, 1 for simple move with an assigned detination, 2 for scaning",
+        help="Mode for FarmBot, 1 for simple move with an assigned detination, 2 for scanning",
     )
     parser.add_argument(
-        "-l", "--log", type=Path, default="../log/move.log", help="Path to the log file"
-    )
-    parser.add_argument(
-        "-p",
-        "--photo",
+        "-l",
+        "--log",
         type=Path,
-        default="../img",
-        help="Mode for FarmBot, 1 for simple move with an assigned detination, 2 for scaning",
+        default=LOGDIR / "move.log",
+        help="Path to the log file",
     )
     parser.add_argument(
-        "-loc",
-        "--locations",
+        "-p",
+        "--photo_dir",
         type=Path,
-        default="../img/locations/",
-        help="the path to txt files contains locations from encoders corresponds to each photo",
+        default=TMPDIR,
+        help="Directory to store photos.",
     )
     parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode")
-    arguments = parser.parse_args()
+    args = parser.parse_args()
 
-    if arguments.mode == 1:
-        Logger.info("Input the destination:")
+    if args.mode == 1:
+        log.info("Input the destination:")
         destination_x = int(input("X:"))
         destination_y = int(input("Y:"))
         destination_z = int(input("Z:"))
         simple_move_start = time()
         simple_move(destination_x, destination_y, destination_z)
-        Logger.info(f"time cost {time()-simple_move_start}")
-    elif arguments.mode == 2:
-        scan(arguments.photo, arguments.locations, flag=False)
-        # take_photo(arguments.photo)
-    elif arguments.mode == 3:
-        take_photo()
+        log.info(f"time cost {time()-simple_move_start}")
+    elif args.mode == 2:
+        scan(args.photo_dir)
+        download_images(args.photo_dir)
     else:
-        Logger.error("Wrong mode number {arguments.mode}")
+        log.error("Wrong mode number {arguments.mode}")
diff --git a/src/farmbot_yolo/try.py b/src/farmbot_yolo/try.py
deleted file mode 100644
index 9f42d8e..0000000
--- a/src/farmbot_yolo/try.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from os import listdir, remove
-from os.path import join
-from pathlib import Path
-
-def remove_temp(path: Path)-> None:
-    # list file
-    for filename in listdir(path):
-        file =Path(join(path, filename))
-        if file.is_file():
-            remove(file)
-    return
-
-path = '../img'
-remove_temp(path)
\ No newline at end of file
diff --git a/static/distance.txt b/static/distance.txt
index 79999e8..ec6f0de 100644
--- a/static/distance.txt
+++ b/static/distance.txt
@@ -1,6 +1,6 @@
 camera's distance to the encoder
-0
-0
+-30
+-130
 gripper's distance to the encoder
+45
 0
-0
\ No newline at end of file
-- 
GitLab