Skip to content

Commit

Permalink
Merge pull request #121 from VikParuchuri/dev
Browse files Browse the repository at this point in the history
Fix rotate and copy bugs
  • Loading branch information
VikParuchuri committed May 28, 2024
2 parents 53135d0 + 4485273 commit c5f5e77
Show file tree
Hide file tree
Showing 7 changed files with 272 additions and 261 deletions.
520 changes: 263 additions & 257 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "surya-ocr"
version = "0.4.11"
version = "0.4.12"
description = "OCR, layout, reading order, and line detection in 90+ languages"
authors = ["Vik Paruchuri <vik.paruchuri@gmail.com>"]
readme = "README.md"
Expand Down
2 changes: 2 additions & 0 deletions surya/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def batch_detection(images: List, model: SegformerForRegressionMask, processor,
batch_size = get_batch_size()
heatmap_count = model.config.num_labels

images = [image.convert("RGB") for image in images] # also copies the images

orig_sizes = [image.size for image in images]
splits_per_image = [get_total_splits(size, processor) for size in orig_sizes]

Expand Down
1 change: 1 addition & 0 deletions surya/model/recognition/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def numpy_resize(cls, image: np.ndarray, size, interpolation=cv2.INTER_LANCZOS4)
max_width, max_height = size["width"], size["height"]

if (height == max_height and width <= max_width) or (width == max_width and height <= max_height):
image = image.transpose(2, 0, 1)
return image

scale = min(max_width / width, max_height / height)
Expand Down
4 changes: 3 additions & 1 deletion surya/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@


def run_recognition(images: List[Image.Image], langs: List[List[str]], rec_model, rec_processor, bboxes: List[List[List[int]]] = None, polygons: List[List[List[List[int]]]] = None, batch_size=None) -> List[OCRResult]:
images = convert_if_not_rgb(images)
# Polygons need to be in corner format - [[x1, y1], [x2, y2], [x3, y3], [x4, y4]], bboxes in [x1, y1, x2, y2] format
assert bboxes is not None or polygons is not None
assert len(images) == len(langs), "You need to pass in one list of languages for each image"

images = convert_if_not_rgb(images)

slice_map = []
all_slices = []
all_langs = []
Expand Down
2 changes: 1 addition & 1 deletion surya/ordering.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def batch_ordering(images: List, bboxes: List[List[List[float]]], model: OrderVi
if batch_size is None:
batch_size = get_batch_size()

images = convert_if_not_rgb(images)
images = [image.convert("RGB") for image in images] # also copies the images

output_order = []
for i in tqdm(range(0, len(images), batch_size), desc="Finding reading order"):
Expand Down
2 changes: 1 addition & 1 deletion surya/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def batch_recognition(images: List, languages: List[List[str]], model, processor
for l in languages:
assert len(l) <= settings.RECOGNITION_MAX_LANGS, f"OCR only supports up to {settings.RECOGNITION_MAX_LANGS} languages per image, you passed {l}."

images = convert_if_not_rgb(images)
images = [image.convert("RGB") for image in images] # also copies the images
if batch_size is None:
batch_size = get_batch_size()

Expand Down

0 comments on commit c5f5e77

Please sign in to comment.