chore(deps): update machine-learning (#6302)

* chore(deps): update machine-learning

* fix typing, use new lifespan syntax

* wrap in try / finally

* move log

---------

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
This commit is contained in:
renovate[bot]
2024-01-13 05:00:09 +00:00
committed by GitHub
parent bd5ae9f31e
commit 20be42cec0
12 changed files with 238 additions and 212 deletions

View File

@@ -5,10 +5,10 @@ from unittest import mock
import numpy as np
import pytest
from fastapi.testclient import TestClient
from numpy.typing import NDArray
from PIL import Image
from .main import app
from .schemas import ndarray_f32
@pytest.fixture
@@ -17,7 +17,7 @@ def pil_image() -> Image.Image:
@pytest.fixture
def cv_image(pil_image: Image.Image) -> ndarray_f32:
def cv_image(pil_image: Image.Image) -> NDArray[np.float32]:
return np.asarray(pil_image)[:, :, ::-1] # PIL uses RGB while cv2 uses BGR

View File

@@ -2,11 +2,11 @@ import asyncio
import gc
import os
import signal
import sys
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Iterator
from contextlib import asynccontextmanager
from typing import Any, AsyncGenerator, Iterator
from zipfile import BadZipFile
import orjson
@@ -26,7 +26,6 @@ from .schemas import (
)
MultiPartParser.max_file_size = 2**26 # spools to disk if payload is 64 MiB or larger
app = FastAPI()
model_cache = ModelCache(ttl=settings.model_ttl, revalidate=settings.model_ttl > 0)
thread_pool: ThreadPoolExecutor | None = None
@@ -35,8 +34,8 @@ active_requests = 0
last_called: float | None = None
@app.on_event("startup")
def startup() -> None:
@asynccontextmanager
async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
global thread_pool
log.info(
(
@@ -44,21 +43,22 @@ def startup() -> None:
f"{f'after {settings.model_ttl}s of inactivity' if settings.model_ttl > 0 else 'disabled'}."
)
)
# asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
thread_pool = ThreadPoolExecutor(settings.request_threads) if settings.request_threads > 0 else None
if settings.model_ttl > 0 and settings.model_ttl_poll_s > 0:
asyncio.ensure_future(idle_shutdown_task())
log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
@app.on_event("shutdown")
def shutdown() -> None:
log.handlers.clear()
for model in model_cache.cache._cache.values():
del model
if thread_pool is not None:
thread_pool.shutdown()
gc.collect()
try:
if settings.request_threads > 0:
# asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
thread_pool = ThreadPoolExecutor(settings.request_threads) if settings.request_threads > 0 else None
log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
if settings.model_ttl > 0 and settings.model_ttl_poll_s > 0:
asyncio.ensure_future(idle_shutdown_task())
yield
finally:
log.handlers.clear()
for model in model_cache.cache._cache.values():
del model
if thread_pool is not None:
thread_pool.shutdown()
gc.collect()
def update_state() -> Iterator[None]:
@@ -71,6 +71,9 @@ def update_state() -> Iterator[None]:
active_requests -= 1
app = FastAPI(lifespan=lifespan)
@app.get("/", response_model=MessageResponse)
async def root() -> dict[str, str]:
return {"message": "Immich ML"}

View File

@@ -3,10 +3,10 @@ from __future__ import annotations
from pathlib import Path
from typing import Any, NamedTuple
from numpy import ascontiguousarray
import numpy as np
from numpy.typing import NDArray
from ann.ann import Ann
from app.schemas import ndarray_f32, ndarray_i32
from ..config import log, settings
@@ -56,10 +56,10 @@ class AnnSession:
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, ndarray_f32] | dict[str, ndarray_i32],
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
run_options: Any = None,
) -> list[ndarray_f32]:
inputs: list[ndarray_f32] = [ascontiguousarray(v) for v in input_feed.values()]
) -> list[NDArray[np.float32]]:
inputs: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
return self.ann.execute(self.model, inputs)

View File

@@ -6,12 +6,13 @@ from pathlib import Path
from typing import Any, Literal
import numpy as np
from numpy.typing import NDArray
from PIL import Image
from tokenizers import Encoding, Tokenizer
from app.config import clean_name, log
from app.models.transforms import crop, get_pil_resampling, normalize, resize, to_numpy
from app.schemas import ModelType, ndarray_f32, ndarray_i32
from app.schemas import ModelType
from .base import InferenceModel
@@ -40,7 +41,7 @@ class BaseCLIPEncoder(InferenceModel):
self.vision_model = self._make_session(self.visual_path)
log.debug(f"Loaded clip vision model '{self.model_name}'")
def _predict(self, image_or_text: Image.Image | str) -> ndarray_f32:
def _predict(self, image_or_text: Image.Image | str) -> NDArray[np.float32]:
if isinstance(image_or_text, bytes):
image_or_text = Image.open(BytesIO(image_or_text))
@@ -48,7 +49,7 @@ class BaseCLIPEncoder(InferenceModel):
case Image.Image():
if self.mode == "text":
raise TypeError("Cannot encode image as text-only model")
outputs: ndarray_f32 = self.vision_model.run(None, self.transform(image_or_text))[0][0]
outputs: NDArray[np.float32] = self.vision_model.run(None, self.transform(image_or_text))[0][0]
case str():
if self.mode == "vision":
raise TypeError("Cannot encode text as vision-only model")
@@ -59,11 +60,11 @@ class BaseCLIPEncoder(InferenceModel):
return outputs
@abstractmethod
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
def tokenize(self, text: str) -> dict[str, NDArray[np.int32]]:
pass
@abstractmethod
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
pass
@property
@@ -161,11 +162,11 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
self.tokenizer.enable_truncation(max_length=context_length)
log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
def tokenize(self, text: str) -> dict[str, NDArray[np.int32]]:
tokens: Encoding = self.tokenizer.encode(text)
return {"text": np.array([tokens.ids], dtype=np.int32)}
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
image = resize(image, self.size)
image = crop(image, self.size)
image_np = to_numpy(image)
@@ -174,7 +175,7 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
class MCLIPEncoder(OpenCLIPEncoder):
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
def tokenize(self, text: str) -> dict[str, NDArray[np.int32]]:
tokens: Encoding = self.tokenizer.encode(text)
return {
"input_ids": np.array([tokens.ids], dtype=np.int32),

View File

@@ -5,9 +5,10 @@ import cv2
import numpy as np
from insightface.model_zoo import ArcFaceONNX, RetinaFace
from insightface.utils.face_align import norm_crop
from numpy.typing import NDArray
from app.config import clean_name
from app.schemas import BoundingBox, Face, ModelType, ndarray_f32
from app.schemas import Face, ModelType, is_ndarray
from .base import InferenceModel
@@ -36,22 +37,25 @@ class FaceRecognizer(InferenceModel):
)
self.rec_model.prepare(ctx_id=0)
def _predict(self, image: ndarray_f32 | bytes) -> list[Face]:
def _predict(self, image: NDArray[np.uint8] | bytes) -> list[Face]:
if isinstance(image, bytes):
image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR)
bboxes, kpss = self.det_model.detect(image)
decoded_image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR)
else:
decoded_image = image
assert is_ndarray(decoded_image, np.uint8)
bboxes, kpss = self.det_model.detect(decoded_image)
if bboxes.size == 0:
return []
assert isinstance(image, np.ndarray) and isinstance(kpss, np.ndarray)
assert is_ndarray(kpss, np.float32)
scores = bboxes[:, 4].tolist()
bboxes = bboxes[:, :4].round().tolist()
results = []
height, width, _ = image.shape
height, width, _ = decoded_image.shape
for (x1, y1, x2, y2), score, kps in zip(bboxes, scores, kpss):
cropped_img = norm_crop(image, kps)
embedding: ndarray_f32 = self.rec_model.get_feat(cropped_img)[0]
cropped_img = norm_crop(decoded_image, kps)
embedding: NDArray[np.float32] = self.rec_model.get_feat(cropped_img)[0]
face: Face = {
"imageWidth": width,
"imageHeight": height,

View File

@@ -1,8 +1,7 @@
import numpy as np
from numpy.typing import NDArray
from PIL import Image
from app.schemas import ndarray_f32
_PIL_RESAMPLING_METHODS = {resampling.name.lower(): resampling for resampling in Image.Resampling}
@@ -23,11 +22,13 @@ def crop(img: Image.Image, size: int) -> Image.Image:
return img.crop((left, upper, right, lower))
def to_numpy(img: Image.Image) -> ndarray_f32:
def to_numpy(img: Image.Image) -> NDArray[np.float32]:
return np.asarray(img.convert("RGB")).astype(np.float32) / 255.0
def normalize(img: ndarray_f32, mean: float | ndarray_f32, std: float | ndarray_f32) -> ndarray_f32:
def normalize(
img: NDArray[np.float32], mean: float | NDArray[np.float32], std: float | NDArray[np.float32]
) -> NDArray[np.float32]:
return (img - mean) / std

View File

@@ -1,13 +1,10 @@
from enum import StrEnum
from typing import Any, Protocol, TypeAlias, TypedDict, TypeGuard
from typing import Any, Protocol, TypedDict, TypeGuard
import numpy as np
import numpy.typing as npt
from pydantic import BaseModel
ndarray_f32: TypeAlias = np.ndarray[int, np.dtype[np.float32]]
ndarray_i64: TypeAlias = np.ndarray[int, np.dtype[np.int64]]
ndarray_i32: TypeAlias = np.ndarray[int, np.dtype[np.int32]]
class TextResponse(BaseModel):
__root__: str
@@ -35,7 +32,7 @@ class HasProfiling(Protocol):
class Face(TypedDict):
boundingBox: BoundingBox
embedding: ndarray_f32
embedding: npt.NDArray[np.float32]
imageWidth: int
imageHeight: int
score: float
@@ -43,3 +40,7 @@ class Face(TypedDict):
def has_profiling(obj: Any) -> TypeGuard[HasProfiling]:
return hasattr(obj, "profiling") and isinstance(obj.profiling, dict)
def is_ndarray(obj: Any, dtype: "type[np._DTypeScalar_co]") -> "TypeGuard[npt.NDArray[np._DTypeScalar_co]]":
return isinstance(obj, np.ndarray) and obj.dtype == dtype