xorbitsai · qinxuye · Sep 13, 2024 · Sep 12, 2024
diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
@@ -63,7 +63,7 @@
     CreateCompletion,
     ImageList,
     PeftModelConfig,
-    SDAPITxt2imgResult,
+    SDAPIResult,
     VideoList,
     max_tokens_field,
 )
@@ -138,6 +138,24 @@ class SDAPITxt2imgRequst(BaseModel):
     width: Optional[int] = 512
     height: Optional[int] = 512
     sampler_name: Optional[str] = None
+    denoising_strength: Optional[float] = None
+    kwargs: Optional[str] = None
+    user: Optional[str] = None
+
+
+class SDAPIImg2imgRequst(BaseModel):
+    model: Optional[str]
+    init_images: Optional[list]
+    prompt: Optional[str] = ""
+    negative_prompt: Optional[str] = ""
+    steps: Optional[int] = None
+    seed: Optional[int] = -1
+    cfg_scale: Optional[float] = 7.0
+    override_settings: Optional[dict] = {}
+    width: Optional[int] = 512
+    height: Optional[int] = 512
+    sampler_name: Optional[str] = None
+    denoising_strength: Optional[float] = None
     kwargs: Optional[str] = None
     user: Optional[str] = None
 
@@ -574,7 +592,18 @@ async def internal_exception_handler(request: Request, exc: Exception):
             "/sdapi/v1/txt2img",
             self.sdapi_txt2img,
             methods=["POST"],
-            response_model=SDAPITxt2imgResult,
+            response_model=SDAPIResult,
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
+        self._router.add_api_route(
+            "/sdapi/v1/img2img",
+            self.sdapi_img2img,
+            methods=["POST"],
+            response_model=SDAPIResult,
             dependencies=(
                 [Security(self._auth_service, scopes=["models:read"])]
                 if self.is_authenticated()
@@ -1569,6 +1598,40 @@ async def sdapi_txt2img(self, request: Request) -> Response:
             await self._report_error_event(model_uid, str(e))
             raise HTTPException(status_code=500, detail=str(e))
 
+    async def sdapi_img2img(self, request: Request) -> Response:
+        body = SDAPIImg2imgRequst.parse_obj(await request.json())
+        model_uid = body.model or body.override_settings.get("sd_model_checkpoint")
+
+        try:
+            if not model_uid:
+                raise ValueError("Unknown model")
+            model = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+
+        try:
+            kwargs = dict(body)
+            kwargs.update(json.loads(body.kwargs) if body.kwargs else {})
+            image_list = await model.img2img(
+                **kwargs,
+            )
+            return Response(content=image_list, media_type="application/json")
+        except RuntimeError as re:
+            logger.error(re, exc_info=True)
+            await self._report_error_event(model_uid, str(re))
+            self.handle_request_limit_error(re)
+            raise HTTPException(status_code=400, detail=str(re))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+
     async def create_variations(
         self,
         model: str = Form(...),

diff --git a/xinference/core/model.py b/xinference/core/model.py
@@ -793,6 +793,20 @@ async def image_to_image(
             f"Model {self._model.model_spec} is not for creating image."
         )
 
+    @request_limit
+    @log_async(logger=logger)
+    async def img2img(
+        self,
+        **kwargs,
+    ):
+        kwargs.pop("request_id", None)
+        if hasattr(self._model, "img2img"):
+            return await self._call_wrapper_json(
+                self._model.img2img,
+                **kwargs,
+            )
+        raise AttributeError(f"Model {self._model.model_spec} is not for img2img.")
+
     @log_async(
         logger=logger,
         ignore_kwargs=["image"],

diff --git a/xinference/model/image/sdapi.py b/xinference/model/image/sdapi.py
@@ -11,30 +11,48 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import base64
+import io
 import warnings
 
+from PIL import Image
+
 
 class SDAPIToDiffusersConverter:
-    txt2img_identical_args = [
+    txt2img_identical_args = {
         "prompt",
         "negative_prompt",
         "seed",
         "width",
         "height",
         "sampler_name",
-    ]
+    }
     txt2img_arg_mapping = {
         "steps": "num_inference_steps",
         "cfg_scale": "guidance_scale",
+        "denoising_strength": "strength",
+    }
+    img2img_identical_args = {
+        "prompt",
+        "negative_prompt",
+        "seed",
+        "width",
+        "height",
+        "sampler_name",
+    }
+    img2img_arg_mapping = {
+        "init_images": "image",
+        "steps": "num_inference_steps",
+        "cfg_scale": "guidance_scale",
+        "denoising_strength": "strength",
     }
 
     @staticmethod
-    def convert_txt2img_to_diffusers(params: dict) -> dict:
+    def convert_to_diffusers(sd_type: str, params: dict) -> dict:
         diffusers_params = {}
 
-        identical_args = set(SDAPIToDiffusersConverter.txt2img_identical_args)
-        mapping_args = SDAPIToDiffusersConverter.txt2img_arg_mapping
+        identical_args = getattr(SDAPIToDiffusersConverter, f"{sd_type}_identical_args")
+        mapping_args = getattr(SDAPIToDiffusersConverter, f"{sd_type}_arg_mapping")
         for param, value in params.items():
             if param in identical_args:
                 diffusers_params[param] = value
@@ -45,13 +63,17 @@ def convert_txt2img_to_diffusers(params: dict) -> dict:
 
         return diffusers_params
 
+    @staticmethod
+    def get_available_args(sd_type: str) -> set:
+        identical_args = getattr(SDAPIToDiffusersConverter, f"{sd_type}_identical_args")
+        mapping_args = getattr(SDAPIToDiffusersConverter, f"{sd_type}_arg_mapping")
+        return identical_args.union(mapping_args)
+
 
 class SDAPIDiffusionModelMixin:
-    def txt2img(self, **kwargs):
-        available_args = set(
-            SDAPIToDiffusersConverter.txt2img_identical_args
-            + list(SDAPIToDiffusersConverter.txt2img_arg_mapping)
-        )
+    @staticmethod
+    def _check_kwargs(sd_type: str, kwargs: dict):
+        available_args = SDAPIToDiffusersConverter.get_available_args(sd_type)
         unknown_args = []
         available_kwargs = {}
         for arg, value in kwargs.items():
@@ -64,14 +86,20 @@ def txt2img(self, **kwargs):
                 f"Some args are not supported for now and will be ignored: {unknown_args}"
             )
 
-        converted_kwargs = SDAPIToDiffusersConverter.convert_txt2img_to_diffusers(
-            available_kwargs
+        converted_kwargs = SDAPIToDiffusersConverter.convert_to_diffusers(
+            sd_type, available_kwargs
         )
+
         width, height = converted_kwargs.pop("width", None), converted_kwargs.pop(
             "height", None
         )
         if width and height:
             converted_kwargs["size"] = f"{width}*{height}"
+
+        return converted_kwargs
+
+    def txt2img(self, **kwargs):
+        converted_kwargs = self._check_kwargs("txt2img", kwargs)
         result = self.text_to_image(response_format="b64_json", **converted_kwargs)  # type: ignore
 
         # convert to SD API result
@@ -80,3 +108,29 @@ def txt2img(self, **kwargs):
             "info": {"created": result["created"]},
             "parameters": {},
         }
+
+    @staticmethod
+    def _decode_b64_img(img_str: str) -> Image:
+        # img_str in a format: "data:image/png;base64," + raw_b64_img(image)
+        f, data = img_str.split(",", 1)
+        f, encode_type = f.split(";", 1)
+        assert encode_type == "base64"
+        f = f.split("/", 1)[1]
+        b = base64.b64decode(data)
+        return Image.open(io.BytesIO(b), formats=[f])
+
+    def img2img(self, **kwargs):
+        init_images = kwargs.pop("init_images", [])
+        kwargs["init_images"] = [self._decode_b64_img(i) for i in init_images]
+        clip_skip = kwargs.get("override_settings", {}).get("clip_skip")
+        converted_kwargs = self._check_kwargs("img2img", kwargs)
+        if clip_skip:
+            converted_kwargs["clip_skip"] = clip_skip
+        result = self.image_to_image(response_format="b64_json", **converted_kwargs)  # type: ignore
+
+        # convert to SD API result
+        return {
+            "images": [r["b64_json"] for r in result["data"]],
+            "info": {"created": result["created"]},
+            "parameters": {},
+        }
diff --git a/xinference/types.py b/xinference/types.py
@@ -47,7 +47,7 @@ class ImageList(TypedDict):
     data: List[Image]
 
 
-class SDAPITxt2imgResult(TypedDict):
+class SDAPIResult(TypedDict):
     images: List[str]
     parameters: dict
     info: dict