capstone-2022-45 created by GitHub Classroom.
ํ์ด์ดํญ์ค,ํฌ๋กฌ๋ฑ์ ์ปจํธ๋กค ํ ์ ์๋ Selenium์ ์ด์ฉํ์ฌ ํ์ํ ํ์ต๋ฐ์ดํฐ
์ฌ์ฉํ YOLO V4๋ชจ๋ธ์ You Only Look Once์ ์ฝ์๋ก์จ, ๊ฐ์ฒด ํ์ง(Object detection)๋ถ์ผ์์ ๋ง์ด ์๋ ค์ ธ ์์ผ๋ฉฐ ์ด๋ฏธ์ง๋ฅผ ํ ๋ฒ ๋ณด๋ ๊ฒ์ผ๋ก ๋ฌผ์ฒด์ ์ข ๋ฅ์ ์์น๋ฅผ ์ถ์ธกํ๋ฉฐ ์ด๋ฏธ์ง์ ์ ์ฒด ๋งฅ๋ฝ์ ์ดํดํ๋ฏ๋ก ๋น ๋ฅด๊ณ ์ ํํ๋ค..
๊ดํ ๋ฌธ์ ์ธ์(Optical character recognition; OCR)์ ์ฌ๋์ด ์ฐ๊ฑฐ๋ ๊ธฐ๊ณ๋ก ์ธ์ํ ๋ฌธ์์ ์์์ ์ด๋ฏธ์ง ์ค์บ๋๋ก ํ๋ํ์ฌ ๊ธฐ๊ณ๊ฐ ์ฝ์ ์ ์๋ ๋ฌธ์๋ก ๋ณํํ๋ ๊ฒ
- ํ
์๋ํธ(Tesseract):
-
๋ค์ํ ์ด์ ์ฒด์ ๋ฅผ ์ํ ๊ดํ ๋ฌธ์ ์ธ์ ์์ง ์ด ์ํํธ์จ์ด๋ Apache License, ๋ฒ์ 2.0 ์ ๋ฐ๋ผ ๋ฐฐํฌ๋๋ ๋ฌด๋ฃ ์ํํธ์จ์ด์ด๋ฉฐ 2006๋ ๋ถํฐ Google์์ ๊ฐ๋ฐ์ ํ์
-
2006๋ ํ ์๋ํธ๋ ๋น์ ๊ฐ์ฅ ์ ํํ ์คํ ์์ค OCR ์์ง ์ค ํ๋๋ก ๊ฐ์ฃผ๋์๋ค.
-
(์ฝ๋ฉ์์ ์งํํ๋ ์ด์ )
#์ฃผ์์ : Colab ๋ฌด๋ฃ๋ฒ์ ์ ์ต๋ ๋ฐํ์ ์๊ฐ์ 12์๊ฐ์ด๋ฏ๋ก ๊ตฌ๊ธ ๋๋ผ์ด๋ธ๋ฅผ ํตํ ๋ฐ์ดํฐ ๋ฐฑ์ ํ์
Crawling
...
|โโ Crawling.py
|โโ README.md
|โโ chromedriver
|โโ requirements.txt
...
YOLO
...
|โโ cfg/
| โโโcustom-test-yolo.cfg
|โโ data/
| |โโ labels/
| |โโ dog.jpg
| โโโ fruit10.jpg
|โโ images/
| โโโ test.jpg
|โโ weights/
| โโโ chart_custom-train-yolo.png
|โโ YOLOv3.py
|โโ classes.txt
|โโ requirements.txt
...
OCR
...
|โโ test_img/
| โโโtest.jpeg
|โโ OCR.py
|โโ requirements.txt
...
YOLO
- YOLOํ ์คํธ๋ฅผ ์ํ ํ์ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ค์น
pip install opencv-python
pip install numpy as np
https://drive.google.com/file/d/1-F-C5ImQSp12bAmm8VaugJwn6TGeyIgy/view?usp=sharing
<ํด๋น ๋ค์ด๋ก๋ ํ์ผ์ YOLO/weights/ ๊ฒฝ๋ก์ ๋ฃ์ด์ฃผ์ธ์>
OCR
- OCR์ ์ํ ํ์ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ค์น
sudo apt install tesseract-ocr
sudo apt-get install tesseract-ocr-kor
pip install opencv-python
pip install pytesseract
YOLO
>- Yolo๋ชจ๋ธ์ ์ฌ์ฉํ๊ธฐ ์ํ ์ฝ๋
if not os.path.exists('Capstone.weights'):
url = 'https://drive.google.com/uc?id=1-F-C5ImQSp12bAmm8VaugJwn6TGeyIgy&export=download'
gdown.download(url, 'Capstone.weights', quiet = False)
##### ํ์ตํ์ผ ๋ค์ด๋ก๋ ๋ง์ฝ ์ด๋ฏธ ํ์ผ์ด ์๋ค๋ฉด ๋ฌด์ํ๋ค.
min_confidence = 0.1
width = 800
height = 0
show_ratio = 1.0
Weights = 'Capstone.weights'
## ํ์ตํ์ผ
file_name = "images/test.jpg"
## ํ
์คํธ ์ด๋ฏธ์ง
test_cfg = "cfg/custom-test-yolo.cfg"
## YOLO configํ์ผ
net = cv2.dnn.readNetFromDarknet(test_cfg,Weights)
classes = ["๋ฌธ์ด","์์ก์ด๋ฒ์ฏ","๋ธ๋ฃจ๋ฒ ๋ฆฌ","๋ฐฉ์ธํ ๋งํ ","๋ฌด", "๋ฐฐ", "์ฝฉ๋๋ฌผ"
,"๊ฝ๊ฒ","์๋ฐฐ์ถ", "์ํ", "์์ฐ", "์๊ธ์น", "๊นป์", "์ ํธ๋ฐ", "๋ฐฅ", "์ฅ์์"
,"๋ง๋", "๋ฐ์ง๋ฝ", "๊ฐ์", "์๋ฐ", "๋ธ๋ก์ฝ๋ฆฌ", "์ค์ด", "๋ฉ๋ก ", "ํ", "์ค์ง์ด"
,"๋น๊ทผ", "๋ณต์ญ์", "์์ถ","๊ณ๋", "ํํ๋ฆฌ์นด", "์ฌ๊ณผ", "๊ณ ์ถ", "๋ผ์ง๊ณ ๊ธฐ", "์ฐธ์ธ"
,"๋ฉธ์น", "๊ณ ๋ฑ์ด", "์กฐ๊ธฐ", "๋ฐฐ์ถ", "๊ฐ", "๋ธ๊ธฐ", "๊ฐ์ง", "์๊ณ ๊ธฐ", "๊ณ ๊ตฌ๋ง"
,"๋ฒํฐ", "๊ทค", "๋ญ๊ณ ๊ธฐ", "๋๋ถ" ,"์์ก์ด๋ฒ์ฏ", "ํค์", "๊ฐ์น"]
class_count = 50
color_lists = np.random.uniform(0, 255, size= (len(classes), 3))
layer_names = net.getLayerNames()
output_layers = ['yolo_139', 'yolo_150', 'yolo_161']
img = cv2.imread(file_name)
h,w = img.shape[:2]
height = int(h * width / w)
blob = cv2.dnn.blobFromImage(img, 0.00392, (608,608), swapRB=True, crop=False
)
net.setInput(blob)
outs = net.forward(output_layers)
confidences = []
names = []
boxes = []
colors = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > min_confidence:
#print(detection)
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w /2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
names.append(classes[class_id])
colors.append(color_lists[class_id])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)
## ์ค๋ณต๋ ๋ฐ์ค ์ ๊ฑฐ
print(set(names))
OCR
>- ๊ตฌ๊ธ์์ ์ ๊ณตํด์ฃผ๋ ๊ดํ๋ฌธ์์ต์ tesseract-ocr์ ์ด์ฉ
- kor.traineddata : ํ๊ธ ๋ฐ์ดํฐ๊ฐ ์๋ ํ์ผ
OpenCV๋ฅผ ์ด์ฉํ์ฌ ์๊ณก ์ด๋ฏธ์ง๋ฅผ ์๊ทผ๋ณํ ์ดํ pytesseract๋ฅผ ์ด์ฉํ์ฌ ์์์ฆ์์ ํด๋น ํ ์คํธ ๊ฒ์ถ
from cv2 import INTER_AREA, INTER_LINEAR
import pytesseract
import numpy as np
import cv2
classes = ["๊ฐ์ง","๊ฐ์", "๊นป์", "๋ฒํฐ", "๋น๊ทผ",
"๋ํ","๋ง๋", "๋ฌด","๋ฐฐ์ถ","๋ธ๋ก์ฝ๋ฆฌ",
"์์ถ","์์ก์ด๋ฒ์ฏ","์๊ธ์น","์ ํธ๋ฐ",
"์๋ฐฐ์ถ", "์์ก์ด๋ฒ์ฏ","์ํ","์ค์ด",
"๊ณ ์ถ","๊ณ ๊ตฌ๋ง", "์ฝฉ๋๋ฌผ", "๊ทค","๊ฐ",
"๋ธ๊ธฐ", "๋ฉ๋ก ", "์ฐธ์ธ", "๋ฐฐ", "๋ณต์ญ์",
"๋ธ๋ฃจ๋ฒ ๋ฆฌ", "์ฌ๊ณผ", "์๋ฐ", "ํํ๋ฆฌ์นด",
"ํค์","๋ฐฉ์ธํ ๋งํ ", "์๊ณ ๊ธฐ","๋ผ์ง๊ณ ๊ธฐ",
"๋ญ๊ณ ๊ธฐ", "๋ฌ๊ฑ", "์กฐ๊ธฐ", "๊ฐ์น","๊ณ ๋ฑ์ด",
"๋ฌธ์ด", "๊ฝ๊ฒ", "์์ฐ", "์ค์ง์ด","๋ฐ์ง๋ฝ",
"๋ฉธ์น", "๋๋ถ", "์ฅ์์","๋ฐฅ"]
def order_points(pts): ## 4๊ฐ์ ๊ผญ์ง์ ์ ์ฐพ๋ ํจ์
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts): ##4๊ฐ์ ๊ผญ์ง์ ์ ๊ธฐ์ค์ผ๋ก ํฌ์๋ณํ
rect = order_points(pts)
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
####### ์ด๋ฏธ์ง ์ฝ๊ธฐ
img = cv2.imread('test_img/test2.jpg')
ratio = 600.0/img.shape[0]
dim = (int(img.shape[1] * ratio), 600)
img = cv2.resize(img, dim, interpolation= cv2.INTER_AREA)
og_img = img.copy()
GRAY = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
GRAY = cv2.GaussianBlur(GRAY, (3,3), 0)
edged = cv2.Canny(GRAY, 70,200)
cnts, _ = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key =cv2.contourArea, reverse= True)[:5]
## ๋ฐํ๋ฐ์ cnt์ค ๋ฉด์ ์ธ ํฐ ์์๋๋ก 5๋ฒ๊น์ง ๋ฐํ
check = False
for c in cnts:
## ์์ฐจ์ ์ผ๋ก ํ์
peri = cv2.arcLength(c, True)
## ์ปจํฌ์ด์ ๊ธธ์ด๋ฅผ ๋ฐํ
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
## ๊ธธ์ด์ ์ค์ฐจ 2ํผ์ผํธ๋ก ๋ํ์ ๊ทผ์ฌํ
if len(approx) == 4 and cv2.contourArea(c)>=20000:
## ๊ทผ์ฌํํ ๋ํ์ ๊ผญ์ง์ ์ด 4๊ฐ๋ผ๋ฉด ๊ทธ๊ฒ์ด ๋ฌธ์์ ์ธ๊ณฝ
screenCnt = approx
check = True
break
if check == False:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# cv2.imshow("IMG", img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
else :
cv2.drawContours(img, [screenCnt], -1, (0,255,0), 2)
warped = four_point_transform(og_img, screenCnt.reshape(4, 2))
copy = warped.copy()
img = cv2.cvtColor(copy, cv2.COLOR_BGR2GRAY)
#### TEST ####
# cv2.imshow("IMG", img)
# cv2.imshow("warped", copy)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
GRAY = img.copy()
h,w = GRAY.shape
GRAY = cv2.resize(GRAY, (2*w, 2*h), interpolation= INTER_LINEAR)
GRAY = cv2.fastNlMeansDenoising(GRAY,h=10, searchWindowSize=21,templateWindowSize=7)
min_confidence = 0.6
result = results = pytesseract.image_to_string(GRAY,lang="kor")
string = results
list = []
for i in string :
if i.isalpha() :
list.append(i)
elif i == "\n" :
list.append("\n")
string = "".join(list)
result = string
result = result.replace("\n", " ")
result = result.split(" ")
recipe = []
for i in result :
if i != '' :
recipe.append(i)
#print(recipe)
out = []
for i in recipe:
for j in classes:
if j in i:
print("์ธ์๋ ์ฌ๋ฃ๋ : ", j)
out.append(j)
print(set(out))