diff --git a/README.md b/README.md index dc05d23..832f4dd 100644 --- a/README.md +++ b/README.md @@ -85,13 +85,15 @@ You can choose between two backends for the GPT agent: Image to 3D ### ☁️ Service -Run the image-to-3D generation service locally. +Run the image-to-3D generation service locally or using hunyuan3D API. Models downloaded automatically on first run, please be patient. ```sh # Run in foreground python apps/image_to_3d.py # Or run in the background CUDA_VISIBLE_DEVICES=0 nohup python apps/image_to_3d.py > /dev/null 2>&1 & +# using hunyuan3D API. +python apps/image_to_3d_api.py ``` ### ⚡ API @@ -115,12 +117,16 @@ Support the use of [SAM3D](https://github.com/facebookresearch/sam-3d-objects) o Text to 3D ### ☁️ Service -Deploy the text-to-3D generation service locally. +Deploy the text-to-3D generation service locally or using hunyuan3D API.. Text-to-image model based on the Kolors model, supporting Chinese and English prompts. Models downloaded automatically on first run, please be patient. ```sh +# Run in foreground python apps/text_to_3d.py +# using hunyuan3D API. +python apps/text_to_3d_api.py + ``` ### ⚡ API diff --git a/apps/hunyuan_image3d_save.py b/apps/hunyuan_image3d_save.py new file mode 100644 index 0000000..00524c1 --- /dev/null +++ b/apps/hunyuan_image3d_save.py @@ -0,0 +1,177 @@ +import hashlib +import hmac +import json +import time +import sys +import io +from datetime import datetime +from http.client import HTTPSConnection +from pathlib import Path +import urllib.request +import zipfile +import requests,os + +def sign(key, msg): + return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() + + +def download_and_extract_obj( + url: str, + save_dir: str, + ext: str, + zip_name: str = "model.zip" +) -> str: + """ + 下载 zip → 解压 → 返回 mesh 文件路径 + """ + + save_dir = Path(save_dir) + save_dir.mkdir(parents=True, exist_ok=True) + + # 1️⃣ 下载 + resp = requests.get(url, stream=True) + resp.raise_for_status() + content = resp.content + + normalized_ext = ext.lower().lstrip(".") + obj_like = normalized_ext == "obj" + + # 2️⃣ 如果是 zip,解压并返回目标模型文件 + if zipfile.is_zipfile(io.BytesIO(content)): + zip_path = save_dir / zip_name + with zip_path.open("wb") as f: + f.write(content) + with zipfile.ZipFile(zip_path, "r") as z: + z.extractall(save_dir) + + pattern_ext = ".obj" if obj_like else f".{normalized_ext}" + mesh_files = list(save_dir.rglob(f"*{pattern_ext}")) + if not mesh_files: + raise FileNotFoundError(f"未找到 {pattern_ext} 文件") + return str(mesh_files[0].resolve()) + + # 3️⃣ 非 zip:按单文件落盘 + out_ext = "obj" if obj_like else normalized_ext + model_path = save_dir / f"result.{out_ext}" + with model_path.open("wb") as f: + f.write(content) + return str(model_path.resolve()) + + +def query_and_download_hunyuan_job(job_id: str, save_dir: str, secret_id: str, secret_key: str): + token = "" + service = "ai3d" + host = "ai3d.tencentcloudapi.com" + region = "ap-guangzhou" + version = "2025-05-13" + action = "QueryHunyuanTo3DRapidJob" + + payload = json.dumps({"JobId": job_id}, ensure_ascii=False) + + algorithm = "TC3-HMAC-SHA256" + timestamp = int(time.time()) + date = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d") + + # ===== Step 1: Canonical request ===== + canonical_headers = ( + "content-type:application/json; charset=utf-8\n" + f"host:{host}\n" + f"x-tc-action:{action.lower()}\n" + ) + signed_headers = "content-type;host;x-tc-action" + hashed_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest() + + canonical_request = ( + "POST\n/\n\n" + + canonical_headers + + "\n" + + signed_headers + + "\n" + + hashed_payload + ) + + # ===== Step 2: String to sign ===== + credential_scope = f"{date}/{service}/tc3_request" + hashed_canonical_request = hashlib.sha256( + canonical_request.encode("utf-8") + ).hexdigest() + + string_to_sign = ( + f"{algorithm}\n{timestamp}\n{credential_scope}\n{hashed_canonical_request}" + ) + + # ===== Step 3: Signature ===== + secret_date = sign(("TC3" + secret_key).encode("utf-8"), date) + secret_service = sign(secret_date, service) + secret_signing = sign(secret_service, "tc3_request") + signature = hmac.new( + secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256 + ).hexdigest() + + authorization = ( + f"{algorithm} " + f"Credential={secret_id}/{credential_scope}, " + f"SignedHeaders={signed_headers}, " + f"Signature={signature}" + ) + + headers = { + "Authorization": authorization, + "Content-Type": "application/json; charset=utf-8", + "Host": host, + "X-TC-Action": action, + "X-TC-Timestamp": str(timestamp), + "X-TC-Version": version, + "X-TC-Region": region, + } + + # ===== Request ===== + MAX_WAIT_SECONDS = 600 # 最多等 10 分钟 + POLL_INTERVAL = 20 # 每 20 秒查一次 + start_time = time.time() + while True: + req = HTTPSConnection(host) + req.request("POST", "/", body=payload.encode("utf-8"), headers=headers) + resp = req.getresponse() + + body_str = resp.read().decode("utf-8") + result = json.loads(body_str) + + response = result.get("Response", {}) + status = response.get("Status") + + print(f"[AI3D] Job status = {status}") + + if status == "DONE": + break + + if time.time() - start_time > MAX_WAIT_SECONDS: + raise TimeoutError("Job polling timeout") + + time.sleep(POLL_INTERVAL) + + files = response.get("ResultFile3Ds", []) + model_paths = [] + preview_paths = [] + + for i, item in enumerate(files): + + model_url = item.get("Url") + preview_url = item.get("PreviewImageUrl") + file_type = item.get("Type", "UNKNOWN").lower() # obj / glb / fbx ... + + # ---------- 1. 下载 ZIP ---------- + + if model_url: + obj_path = download_and_extract_obj(model_url, save_dir, file_type) + model_paths.append(obj_path) + + save_dir = Path(save_dir) + + # ---------- 4. 下载预览图 ---------- + if preview_url: + preview_path = save_dir / f"preview_{i}.png" + urllib.request.urlretrieve(preview_url, preview_path) + preview_paths.append(str(preview_path)) + + return model_paths[0], preview_paths[0] diff --git a/apps/hunyuan_image3d_submit.py b/apps/hunyuan_image3d_submit.py new file mode 100644 index 0000000..d612395 --- /dev/null +++ b/apps/hunyuan_image3d_submit.py @@ -0,0 +1,153 @@ +import argparse +import base64 +import hashlib +import hmac +import json +import os +import time +from datetime import datetime +from http.client import HTTPSConnection +from typing import Any, Dict, Optional + + +def _sign(key: bytes, msg: str) -> bytes: + return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() + + +def _build_tc3_headers( + payload: str, + action: str, + secret_id: str, + secret_key: str, + service: str = "ai3d", + host: str = "ai3d.tencentcloudapi.com", + region: str = "ap-guangzhou", + version: str = "2025-05-13", + token: str = "", +) -> Dict[str, str]: + timestamp = int(time.time()) + date = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d") + + canonical_headers = ( + "content-type:application/json; charset=utf-8\n" + f"host:{host}\n" + f"x-tc-action:{action.lower()}\n" + ) + signed_headers = "content-type;host;x-tc-action" + hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest() + + canonical_request = ( + "POST\n/\n\n" + f"{canonical_headers}\n" + f"{signed_headers}\n" + f"{hashed_request_payload}" + ) + + credential_scope = f"{date}/{service}/tc3_request" + string_to_sign = ( + "TC3-HMAC-SHA256\n" + f"{timestamp}\n" + f"{credential_scope}\n" + f"{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" + ) + + secret_date = _sign(("TC3" + secret_key).encode("utf-8"), date) + secret_service = _sign(secret_date, service) + secret_signing = _sign(secret_service, "tc3_request") + signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() + + authorization = ( + "TC3-HMAC-SHA256 " + f"Credential={secret_id}/{credential_scope}, " + f"SignedHeaders={signed_headers}, " + f"Signature={signature}" + ) + + headers: Dict[str, str] = { + "Authorization": authorization, + "Content-Type": "application/json; charset=utf-8", + "Host": host, + "X-TC-Action": action, + "X-TC-Timestamp": str(timestamp), + "X-TC-Version": version, + "X-TC-Region": region, + } + if token: + headers["X-TC-Token"] = token + return headers + + +def _post_ai3d(payload_obj: Dict[str, Any], action: str, secret_id: str, secret_key: str) -> Dict[str, Any]: + host = "ai3d.tencentcloudapi.com" + payload = json.dumps(payload_obj, separators=(",", ":"), ensure_ascii=False) + headers = _build_tc3_headers(payload, action, secret_id, secret_key, host=host) + + conn = HTTPSConnection(host) + conn.request("POST", "/", body=payload.encode("utf-8"), headers=headers) + resp = conn.getresponse() + body = resp.read().decode("utf-8") + result = json.loads(body) + + if "Error" in result.get("Response", {}): + raise RuntimeError(f"Tencent API Error: {result['Response']['Error']}") + return result + + +def hunyuan_image3d_submit( + image_path: str, + secret_id: Optional[str] = None, + secret_key: Optional[str] = None, + result_format: str = "GLB", + enable_pbr: bool = True, + action: Optional[str] = None, +) -> str: + """Submit image-to-3d job and return JobId.""" + secret_id = secret_id or os.getenv("TENCENT_SECRET_ID") or os.getenv("TENCENTCLOUD_SECRET_ID") + secret_key = secret_key or os.getenv("TENCENT_SECRET_KEY") or os.getenv("TENCENTCLOUD_SECRET_KEY") + if secret_id: + secret_id = secret_id.strip().strip("'\"") + if secret_key: + secret_key = secret_key.strip().strip("'\"") + if not secret_id or not secret_key: + raise ValueError("Missing credential: set secret_id/secret_key or env TENCENT_SECRET_ID/TENCENT_SECRET_KEY") + if not os.path.isfile(image_path): + raise FileNotFoundError(f"image_path not found: {image_path}") + + with open(image_path, "rb") as f: + image_base64 = base64.b64encode(f.read()).decode("utf-8") + + payload = { + "ImageBase64": image_base64, + "ResultFormat": result_format, + "EnablePBR": enable_pbr, + } + submit_action = action or os.getenv("HUNYUAN_IMAGE3D_ACTION") or "SubmitHunyuanTo3DProJob" + result = _post_ai3d(payload, action=submit_action, secret_id=secret_id, secret_key=secret_key) + return result["Response"]["JobId"] + + +# Backward compatible alias +submit_hunyuan_image3D = hunyuan_image3d_submit + + +def main() -> None: + parser = argparse.ArgumentParser(description="Submit Hunyuan Image->3D job") + parser.add_argument("--image-path", required=True, help="Input image local path") + parser.add_argument("--secret-id", default=None, help="Tencent Cloud SecretId (or env TENCENT_SECRET_ID)") + parser.add_argument("--secret-key", default=None, help="Tencent Cloud SecretKey (or env TENCENT_SECRET_KEY)") + parser.add_argument("--result-format", default="GLB", choices=["GLB", "OBJ", "FBX"], help="Output format") + parser.add_argument("--disable-pbr", action="store_true", help="Disable PBR material generation") + args = parser.parse_args() + + job_id = hunyuan_image3d_submit( + image_path=args.image_path, + secret_id=args.secret_id, + secret_key=args.secret_key, + result_format=args.result_format, + enable_pbr=not args.disable_pbr, + ) + print(job_id) + + +if __name__ == "__main__": + main() diff --git a/apps/hunyuan_text3d_submit.py b/apps/hunyuan_text3d_submit.py new file mode 100644 index 0000000..a3ae31e --- /dev/null +++ b/apps/hunyuan_text3d_submit.py @@ -0,0 +1,149 @@ +import argparse +import hashlib +import hmac +import json +import os +import time +from datetime import datetime +from http.client import HTTPSConnection +from typing import Any, Dict, Optional + + +def _sign(key: bytes, msg: str) -> bytes: + return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() + + +def _build_tc3_headers( + payload: str, + action: str, + secret_id: str, + secret_key: str, + service: str = "ai3d", + host: str = "ai3d.tencentcloudapi.com", + region: str = "ap-guangzhou", + version: str = "2025-05-13", + token: str = "", +) -> Dict[str, str]: + timestamp = int(time.time()) + date = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d") + + canonical_headers = ( + "content-type:application/json; charset=utf-8\n" + f"host:{host}\n" + f"x-tc-action:{action.lower()}\n" + ) + signed_headers = "content-type;host;x-tc-action" + hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest() + + canonical_request = ( + "POST\n/\n\n" + f"{canonical_headers}\n" + f"{signed_headers}\n" + f"{hashed_request_payload}" + ) + + credential_scope = f"{date}/{service}/tc3_request" + string_to_sign = ( + "TC3-HMAC-SHA256\n" + f"{timestamp}\n" + f"{credential_scope}\n" + f"{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" + ) + + secret_date = _sign(("TC3" + secret_key).encode("utf-8"), date) + secret_service = _sign(secret_date, service) + secret_signing = _sign(secret_service, "tc3_request") + signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() + + authorization = ( + "TC3-HMAC-SHA256 " + f"Credential={secret_id}/{credential_scope}, " + f"SignedHeaders={signed_headers}, " + f"Signature={signature}" + ) + + headers: Dict[str, str] = { + "Authorization": authorization, + "Content-Type": "application/json; charset=utf-8", + "Host": host, + "X-TC-Action": action, + "X-TC-Timestamp": str(timestamp), + "X-TC-Version": version, + "X-TC-Region": region, + } + if token: + headers["X-TC-Token"] = token + return headers + + +def _post_ai3d(payload_obj: Dict[str, Any], action: str, secret_id: str, secret_key: str) -> Dict[str, Any]: + host = "ai3d.tencentcloudapi.com" + payload = json.dumps(payload_obj, separators=(",", ":"), ensure_ascii=False) + headers = _build_tc3_headers(payload, action, secret_id, secret_key, host=host) + + conn = HTTPSConnection(host) + conn.request("POST", "/", body=payload.encode("utf-8"), headers=headers) + resp = conn.getresponse() + body = resp.read().decode("utf-8") + result = json.loads(body) + + if "Error" in result.get("Response", {}): + raise RuntimeError(f"Tencent API Error: {result['Response']['Error']}") + return result + + +def hunyuan_text3d_submit( + prompt: str, + secret_id: Optional[str] = None, + secret_key: Optional[str] = None, + result_format: str = "GLB", + enable_pbr: bool = True, + action: Optional[str] = None, +) -> str: + """Submit text-to-3d job and return JobId.""" + secret_id = secret_id or os.getenv("TENCENT_SECRET_ID") or os.getenv("TENCENTCLOUD_SECRET_ID") + secret_key = secret_key or os.getenv("TENCENT_SECRET_KEY") or os.getenv("TENCENTCLOUD_SECRET_KEY") + if secret_id: + secret_id = secret_id.strip().strip("\'\"") + if secret_key: + secret_key = secret_key.strip().strip("\'\"") + if not secret_id or not secret_key: + raise ValueError("Missing credential: set secret_id/secret_key or env TENCENT_SECRET_ID/TENCENT_SECRET_KEY") + if not prompt.strip(): + raise ValueError("prompt cannot be empty") + + payload = { + "Prompt": prompt, + "ResultFormat": result_format, + "EnablePBR": enable_pbr, + } + submit_action = action or os.getenv("HUNYUAN_TEXT3D_ACTION") or "SubmitHunyuanTo3DProJob" + result = _post_ai3d(payload, action=submit_action, secret_id=secret_id, secret_key=secret_key) + return result["Response"]["JobId"] + + +# Backward compatible alias +submit_hunyuan_text3D = hunyuan_text3d_submit + + +def main() -> None: + parser = argparse.ArgumentParser(description="Submit Hunyuan Text->3D job") + parser.add_argument("--prompt", required=True, help="Text prompt for 3D generation") + parser.add_argument("--secret-id", default=None, help="Tencent Cloud SecretId (or env TENCENT_SECRET_ID)") + parser.add_argument("--secret-key", default=None, help="Tencent Cloud SecretKey (or env TENCENT_SECRET_KEY)") + parser.add_argument("--result-format", default="GLB", choices=["GLB", "OBJ", "FBX"], help="Output format") + parser.add_argument("--disable-pbr", action="store_true", help="Disable PBR material generation") + args = parser.parse_args() + + job_id = hunyuan_text3d_submit( + prompt=args.prompt, + secret_id=args.secret_id, + secret_key=args.secret_key, + result_format=args.result_format, + enable_pbr=not args.disable_pbr, + ) + print(job_id) + + +if __name__ == "__main__": + main() diff --git a/apps/image_to_3d_api.py b/apps/image_to_3d_api.py new file mode 100644 index 0000000..7f081da --- /dev/null +++ b/apps/image_to_3d_api.py @@ -0,0 +1,608 @@ +# Project EmbodiedGen +# +# Copyright (c) 2025 Horizon Robotics. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. + + +import os +import random +from pathlib import Path +import tempfile + +# GRADIO_APP == "imageto3d_sam3d", sam3d object model, by default. +# GRADIO_APP == "imageto3d", TRELLIS model. +#os.environ["GRADIO_APP"] = "imageto3d_sam3d" +os.environ["GRADIO_APP"] = "imageto3d" + +# Keep Gradio temp/cache under project workspace to avoid /tmp permission issues. +_gradio_tmp_root = Path(__file__).resolve().parent.parent / "tmp" / "gradio" +_gradio_tmp_root.mkdir(parents=True, exist_ok=True) +os.environ.setdefault("GRADIO_TEMP_DIR", str(_gradio_tmp_root)) +os.environ.setdefault("TMPDIR", str(_gradio_tmp_root)) +tempfile.tempdir = str(_gradio_tmp_root) +from glob import glob + +import gradio as gr +import trimesh +from app_style import custom_theme, image_css, lighting_css +from embodied_gen.utils.tags import VERSION +from hunyuan_image3d_save import query_and_download_hunyuan_job +from hunyuan_image3d_submit import hunyuan_image3d_submit + +app_name = os.getenv("GRADIO_APP") +if app_name == "imageto3d_sam3d": + enable_pre_resize = False + sample_step = 25 + bg_rm_model_name = "rembg" # "rembg", "rmbg14" +elif app_name == "imageto3d": + enable_pre_resize = True + sample_step = 12 + bg_rm_model_name = "rembg" # "rembg", "rmbg14" + +MAX_SEED = 100000 +SESSION_ROOT = Path("./tmp/gradio_sessions") +SESSION_ROOT.mkdir(parents=True, exist_ok=True) + + +def start_session(req: gr.Request) -> None: + session_hash = req.session_hash if req is not None else "default" + (SESSION_ROOT / str(session_hash)).mkdir(parents=True, exist_ok=True) + + +def end_session(req: gr.Request) -> None: + # Keep lightweight API mode side-effect free; no mandatory cleanup here. + return None + + +def active_btn_by_content(content) -> gr.Button: + return gr.Button(interactive=content is not None) + + +def get_seed(randomize_seed: bool, seed: int, max_seed: int = MAX_SEED) -> int: + return random.randint(0, max_seed) if randomize_seed else seed + + +def preprocess_image_fn(image, rmbg_tag: str = "rembg", preprocess: bool = True): + # API-only mode: avoid loading local segmentation/rembg models at startup. + if image is None: + return None, None + image_cache = image.copy() if hasattr(image, "copy") else image + return image, image_cache + + +def preprocess_sam_image_fn(image): + # API-only mode: SAM model is not loaded; keep input passthrough. + if image is None: + return None, None + image_cache = image.copy() if hasattr(image, "copy") else image + return image, image_cache + + +def select_point(image, sel_pix, point_type, evt: gr.SelectData): + # API-only mode: no SAM interaction; return current image unchanged. + return (image, None), image + + +def extract_urdf_lazy( + gs_path: str, + mesh_obj_path: str, + asset_cat_text: str, + height_range_text: str, + mass_range_text: str, + asset_version_text: str, + req: gr.Request = None, +): + from common import extract_urdf as _extract_urdf + if (not mesh_obj_path) and gs_path: + session_hash = req.session_hash if req is not None else "default" + obj_dir = Path("./tmp/gradio") / str(session_hash) / "hunyuan_image3d" / "obj" + mesh_obj_path = convert_glb_to_obj(gs_path, str(obj_dir)) + + return _extract_urdf( + gs_path, + mesh_obj_path, + asset_cat_text, + height_range_text, + mass_range_text, + asset_version_text, + req, + ) + + +def image_to_3d_via_hunyuan_api( + image, + raw_image_cache, + seed, + req: gr.Request = None, +): + if image is None: + raise ValueError("Input image is empty") + + secret_id = os.getenv("TENCENT_SECRET_ID") + secret_key = os.getenv("TENCENT_SECRET_KEY") + + if not secret_id or not secret_key: + raise ValueError( + "Missing credentials: set TENCENT_SECRET_ID and TENCENT_SECRET_KEY" + ) + + session_hash = req.session_hash if req is not None else "default" + output_root = Path("./tmp/gradio") / str(session_hash) / "hunyuan_image3d" + output_root.mkdir(parents=True, exist_ok=True) + + image_path = output_root / f"input_{seed}.png" + image.save(image_path) + if raw_image_cache is not None: + raw_image_cache.save(output_root / "raw_image.png") + + glb_job_id = hunyuan_image3d_submit( + image_path=str(image_path), + secret_id=secret_id, + secret_key=secret_key, + result_format="GLB", + ) + # glb_job_id = '1444588221266214912' + # print("glb_job_id:", glb_job_id) + glb_path, preview_path = query_and_download_hunyuan_job( + job_id=glb_job_id, + save_dir=str(output_root / "glb"), + secret_id=secret_id, + secret_key=secret_key, + ) + + return ( + glb_path, + None, + glb_path, + ) + + +def convert_glb_to_obj(glb_path: str, obj_dir: str) -> str: + obj_dir_path = Path(obj_dir) + obj_dir_path.mkdir(parents=True, exist_ok=True) + obj_path = obj_dir_path / "result.obj" + + loaded = trimesh.load(glb_path, force="scene") + if isinstance(loaded, trimesh.Scene): + mesh = loaded.dump(concatenate=True) + else: + mesh = loaded + mesh.export(obj_path) + + return str(obj_path.resolve()) + +with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo: + gr.HTML(image_css, visible=False) + gr.HTML(lighting_css, visible=False) + gr.Markdown( + """ + ## ***EmbodiedGen***: Image-to-3D Asset + **🔖 Version**: {VERSION} +

+ + 📖 Documentation + + + 📄 arXiv + + + 💻 GitHub + + + 🎥 Video + +

+ + 🖼️ Generate physically plausible 3D asset from single input image. + """.format( + VERSION=VERSION + ), + elem_classes=["header"], + ) + + with gr.Row(): + with gr.Column(scale=3): + with gr.Tabs() as input_tabs: + with gr.Tab( + label="Image(auto seg)", id=0 + ) as single_image_input_tab: + raw_image_cache = gr.Image( + format="png", + image_mode="RGB", + type="pil", + visible=False, + ) + image_prompt = gr.Image( + label="Input Image", + format="png", + image_mode="RGBA", + type="pil", + height=400, + elem_classes=["image_fit"], + ) + gr.Markdown( + """ + If you are not satisfied with the auto segmentation + result, please switch to the `Image(SAM seg)` tab.""" + ) + with gr.Tab( + label="Image(SAM seg)", id=1 + ) as samimage_input_tab: + with gr.Row(): + with gr.Column(scale=1): + image_prompt_sam = gr.Image( + label="Input Image", + type="numpy", + height=400, + elem_classes=["image_fit"], + ) + image_seg_sam = gr.Image( + label="SAM Seg Image", + image_mode="RGBA", + type="pil", + height=400, + visible=False, + ) + with gr.Column(scale=1): + image_mask_sam = gr.AnnotatedImage( + elem_classes=["image_fit"] + ) + + fg_bg_radio = gr.Radio( + ["foreground_point", "background_point"], + label="Select foreground(green) or background(red) points, by default foreground", # noqa + value="foreground_point", + ) + gr.Markdown( + """ Click the `Input Image` to select SAM points, + after get the satisified segmentation, click `Generate` + button to generate the 3D asset. \n + Note: If the segmented foreground is too small relative + to the entire image area, the generation will fail. + """ + ) + + with gr.Accordion(label="Generation Settings", open=False): + with gr.Row(): + seed = gr.Slider( + 0, MAX_SEED, label="Seed", value=0, step=1 + ) + texture_size = gr.Slider( + 1024, + 4096, + label="UV texture size", + value=2048, + step=256, + ) + rmbg_tag = gr.Radio( + choices=["rembg", "rmbg14"], + value=bg_rm_model_name, + label="Background Removal Model", + ) + with gr.Row(): + randomize_seed = gr.Checkbox( + label="Randomize Seed", value=False + ) + project_delight = gr.Checkbox( + label="Back-project Delight", + value=True, + ) + gr.Markdown("Geo Structure Generation") + with gr.Row(): + ss_guidance_strength = gr.Slider( + 0.0, + 10.0, + label="Guidance Strength", + value=7.5, + step=0.1, + ) + ss_sampling_steps = gr.Slider( + 1, + 50, + label="Sampling Steps", + value=sample_step, + step=1, + ) + gr.Markdown("Visual Appearance Generation") + with gr.Row(): + slat_guidance_strength = gr.Slider( + 0.0, + 10.0, + label="Guidance Strength", + value=3.0, + step=0.1, + ) + slat_sampling_steps = gr.Slider( + 1, + 50, + label="Sampling Steps", + value=sample_step, + step=1, + ) + + generate_btn = gr.Button( + "🚀 1. Generate(~2 mins)", + variant="primary", + interactive=False, + ) + model_output_obj = gr.Textbox(label="raw mesh .obj", visible=False) + # with gr.Row(): + # extract_rep3d_btn = gr.Button( + # "🔍 2. Extract 3D Representation(~2 mins)", + # variant="primary", + # interactive=False, + # ) + with gr.Accordion( + label="Enter Asset Attributes(optional)", open=False + ): + asset_cat_text = gr.Textbox( + label="Enter Asset Category (e.g., chair)" + ) + height_range_text = gr.Textbox( + label="Enter **Height Range** in meter (e.g., 0.5-0.6)" + ) + mass_range_text = gr.Textbox( + label="Enter **Mass Range** in kg (e.g., 1.1-1.2)" + ) + asset_version_text = gr.Textbox( + label=f"Enter version (e.g., {VERSION})" + ) + with gr.Row(): + extract_urdf_btn = gr.Button( + "🧩 2. Extract URDF with physics(~1 mins)", + variant="primary", + interactive=False, + ) + with gr.Row(): + gr.Markdown( + "#### Estimated Asset 3D Attributes(No input required)" + ) + with gr.Row(): + est_type_text = gr.Textbox( + label="Asset category", interactive=False + ) + est_height_text = gr.Textbox( + label="Real height(.m)", interactive=False + ) + est_mass_text = gr.Textbox( + label="Mass(.kg)", interactive=False + ) + est_mu_text = gr.Textbox( + label="Friction coefficient", interactive=False + ) + with gr.Row(): + download_urdf = gr.DownloadButton( + label="⬇️ 3. Download URDF", + variant="primary", + interactive=False, + ) + + gr.Markdown( + """ NOTE: If `Asset Attributes` are provided, it will guide + GPT to perform physical attributes restoration. \n + The `Download URDF` file is restored to the real scale and + has quality inspection, open with an editor to view details. + """ + ) + enable_pre_resize = gr.State(enable_pre_resize) + with gr.Row() as single_image_example: + examples = gr.Examples( + label="Image Gallery", + examples=[ + [image_path] + for image_path in sorted( + glob("apps/assets/example_image/*") + ) + ], + inputs=[image_prompt, rmbg_tag, enable_pre_resize], + fn=preprocess_image_fn, + outputs=[image_prompt, raw_image_cache], + run_on_click=True, + examples_per_page=10, + ) + + with gr.Row(visible=False) as single_sam_image_example: + examples = gr.Examples( + label="Image Gallery", + examples=[ + [image_path] + for image_path in sorted( + glob("apps/assets/example_image/*") + ) + ], + inputs=[image_prompt_sam], + fn=preprocess_sam_image_fn, + outputs=[image_prompt_sam, raw_image_cache], + run_on_click=True, + examples_per_page=10, + ) + with gr.Column(scale=2): + gr.Markdown("
") + generated_model_output = gr.Model3D( + label="Generated 3D Asset", + height=700, + interactive=False, + ) + + is_samimage = gr.State(False) + output_buf = gr.State() + selected_points = gr.State(value=[]) + + demo.load(start_session) + demo.unload(end_session) + + single_image_input_tab.select( + lambda: tuple( + [False, gr.Row.update(visible=True), gr.Row.update(visible=False)] + ), + outputs=[is_samimage, single_image_example, single_sam_image_example], + ) + samimage_input_tab.select( + lambda: tuple( + [True, gr.Row.update(visible=True), gr.Row.update(visible=False)] + ), + outputs=[is_samimage, single_sam_image_example, single_image_example], + ) + + image_prompt.upload( + preprocess_image_fn, + inputs=[image_prompt, rmbg_tag, enable_pre_resize], + outputs=[image_prompt, raw_image_cache], + ) + image_prompt.change( + lambda: tuple( + [ + # gr.Button(interactive=False), + gr.Button(interactive=False), + gr.Button(interactive=False), + None, + None, + "", + "", + "", + "", + "", + "", + "", + "", + ] + ), + outputs=[ + # extract_rep3d_btn, + extract_urdf_btn, + download_urdf, + generated_model_output, + model_output_obj, + asset_cat_text, + height_range_text, + mass_range_text, + asset_version_text, + est_type_text, + est_height_text, + est_mass_text, + est_mu_text, + ], + ) + image_prompt.change( + active_btn_by_content, + inputs=image_prompt, + outputs=generate_btn, + ) + + image_prompt_sam.upload( + preprocess_sam_image_fn, + inputs=[image_prompt_sam], + outputs=[image_prompt_sam, raw_image_cache], + ) + image_prompt_sam.change( + lambda: tuple( + [ + # gr.Button(interactive=False), + gr.Button(interactive=False), + gr.Button(interactive=False), + None, + "", + "", + "", + "", + "", + "", + "", + "", + None, + [], + ] + ), + outputs=[ + # extract_rep3d_btn, + extract_urdf_btn, + download_urdf, + generated_model_output, + model_output_obj, + asset_cat_text, + height_range_text, + mass_range_text, + asset_version_text, + est_type_text, + est_height_text, + est_mass_text, + est_mu_text, + image_mask_sam, + selected_points, + ], + ) + + image_prompt_sam.select( + select_point, + [ + image_prompt_sam, + selected_points, + fg_bg_radio, + ], + [image_mask_sam, image_seg_sam], + ) + image_seg_sam.change( + active_btn_by_content, + inputs=image_seg_sam, + outputs=generate_btn, + ) + + generate_btn.click( + get_seed, + inputs=[randomize_seed, seed], + outputs=[seed], + ).success( + image_to_3d_via_hunyuan_api, + inputs=[ + image_prompt, + raw_image_cache, + seed, + ], + outputs=[ + generated_model_output, + model_output_obj, + output_buf, + ], + ).success( + lambda: gr.Button(interactive=True), + outputs=[extract_urdf_btn], + ) + + extract_urdf_btn.click( + extract_urdf_lazy, + inputs=[ + output_buf, + model_output_obj, + asset_cat_text, + height_range_text, + mass_range_text, + asset_version_text, + ], + outputs=[ + download_urdf, + est_type_text, + est_height_text, + est_mass_text, + est_mu_text, + ], + queue=True, + show_progress="full", + ).success( + lambda: gr.Button(interactive=True), + outputs=[download_urdf], + ) + + +if __name__ == "__main__": + demo.launch(server_port=8542) diff --git a/apps/text_to_3d_api.py b/apps/text_to_3d_api.py new file mode 100644 index 0000000..af0b979 --- /dev/null +++ b/apps/text_to_3d_api.py @@ -0,0 +1,353 @@ +# Project EmbodiedGen +# +# Copyright (c) 2025 Horizon Robotics. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. + +import os +import random +from pathlib import Path +import tempfile + +import gradio as gr +import trimesh +from app_style import custom_theme, image_css, lighting_css +from embodied_gen.utils.tags import VERSION +from hunyuan_image3d_save import query_and_download_hunyuan_job +from hunyuan_text3d_submit import hunyuan_text3d_submit + +# GRADIO_APP == "textto3d_sam3d", sam3d object model, by default. +# GRADIO_APP == "textto3d", TRELLIS model. +os.environ["GRADIO_APP"] = "textto3d_sam3d" + +# Keep Gradio temp/cache under project workspace to avoid /tmp permission issues. +_gradio_tmp_root = Path(__file__).resolve().parent.parent / "tmp" / "gradio" +_gradio_tmp_root.mkdir(parents=True, exist_ok=True) +os.environ.setdefault("GRADIO_TEMP_DIR", str(_gradio_tmp_root)) +os.environ.setdefault("TMPDIR", str(_gradio_tmp_root)) +tempfile.tempdir = str(_gradio_tmp_root) + +MAX_SEED = 100000 +SESSION_ROOT = Path("./tmp/gradio_sessions") +SESSION_ROOT.mkdir(parents=True, exist_ok=True) + +app_name = os.getenv("GRADIO_APP") +if app_name == "textto3d_sam3d": + sample_step = 25 +else: + sample_step = 12 + + +def start_session(req: gr.Request) -> None: + session_hash = req.session_hash if req is not None else "default" + (SESSION_ROOT / str(session_hash)).mkdir(parents=True, exist_ok=True) + + +def end_session(req: gr.Request) -> None: + return None + + +def active_btn_by_text_content(content: str) -> gr.Button: + return gr.Button(interactive=bool((content or "").strip())) + + +def get_seed(randomize_seed: bool, seed: int, max_seed: int = MAX_SEED) -> int: + return random.randint(0, max_seed) if randomize_seed else seed + + +def text_to_3d_via_hunyuan_api(prompt: str, seed: int, req: gr.Request = None): + prompt = (prompt or "").strip() + if not prompt: + raise ValueError("Text prompt is empty") + + secret_id = os.getenv("TENCENT_SECRET_ID") + secret_key = os.getenv("TENCENT_SECRET_KEY") + + if not secret_id or not secret_key: + raise ValueError( + "Missing credentials: set TENCENT_SECRET_ID and TENCENT_SECRET_KEY" + ) + + session_hash = req.session_hash if req is not None else "default" + output_root = Path("./tmp/gradio") / str(session_hash) / "hunyuan_text3d" + output_root.mkdir(parents=True, exist_ok=True) + + job_id = hunyuan_text3d_submit( + prompt=prompt, + secret_id=secret_id, + secret_key=secret_key, + result_format="GLB", + ) + model_path, _ = query_and_download_hunyuan_job( + job_id=job_id, + save_dir=str(output_root / f"glb_{seed}"), + secret_id=secret_id, + secret_key=secret_key, + ) + return model_path, model_path + + +def convert_glb_to_obj(glb_path: str, obj_dir: str) -> str: + obj_dir_path = Path(obj_dir) + obj_dir_path.mkdir(parents=True, exist_ok=True) + obj_path = obj_dir_path / "result.obj" + + loaded = trimesh.load(glb_path, force="scene") + if isinstance(loaded, trimesh.Scene): + mesh = loaded.dump(concatenate=True) + else: + mesh = loaded + mesh.export(obj_path) + + return str(obj_path.resolve()) + + +def extract_urdf_lazy( + glb_path: str, + mesh_obj_path: str, + asset_cat_text: str, + height_range_text: str, + mass_range_text: str, + asset_version_text: str, + req: gr.Request = None, +): + from common import extract_urdf as _extract_urdf + + if (not mesh_obj_path) and glb_path: + session_hash = req.session_hash if req is not None else "default" + obj_dir = Path("./tmp/gradio") / str(session_hash) / "hunyuan_text3d" / "obj" + mesh_obj_path = convert_glb_to_obj(glb_path, str(obj_dir)) + + return _extract_urdf( + glb_path, + mesh_obj_path, + asset_cat_text, + height_range_text, + mass_range_text, + asset_version_text, + req, + ) + + +with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo: + gr.HTML(image_css, visible=False) + gr.HTML(lighting_css, visible=False) + gr.Markdown( + """ + ## ***EmbodiedGen***: Text-to-3D Asset + **🔖 Version**: {VERSION} + + 📝 Create 3D assets from text descriptions. + """.format( + VERSION=VERSION + ), + elem_classes=["header"], + ) + + with gr.Row(): + with gr.Column(scale=3): + text_prompt = gr.Textbox( + label="Text Prompt (Chinese or English)", + placeholder="Input text prompt here", + ) + + with gr.Accordion(label="Generation Settings", open=False): + with gr.Row(): + seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1) + texture_size = gr.Slider( + 1024, 4096, label="UV texture size", value=2048, step=256 + ) + with gr.Row(): + randomize_seed = gr.Checkbox(label="Randomize Seed", value=False) + project_delight = gr.Checkbox( + label="Back-project Delight", value=True + ) + gr.Markdown("Geo Structure Generation") + with gr.Row(): + ss_guidance_strength = gr.Slider( + 0.0, + 10.0, + label="Guidance Strength", + value=7.5, + step=0.1, + ) + ss_sampling_steps = gr.Slider( + 1, 50, label="Sampling Steps", value=sample_step, step=1 + ) + gr.Markdown("Visual Appearance Generation") + with gr.Row(): + slat_guidance_strength = gr.Slider( + 0.0, + 10.0, + label="Guidance Strength", + value=3.0, + step=0.1, + ) + slat_sampling_steps = gr.Slider( + 1, 50, label="Sampling Steps", value=sample_step, step=1 + ) + + generate_btn = gr.Button( + "🚀 1. Generate 3D(~2 mins)", + variant="primary", + interactive=False, + ) + model_output_obj = gr.Textbox(label="raw mesh .obj", visible=False) + + with gr.Accordion(label="Enter Asset Attributes(optional)", open=False): + asset_cat_text = gr.Textbox( + label="Enter Asset Category (e.g., chair)" + ) + height_range_text = gr.Textbox( + label="Enter Height Range in meter (e.g., 0.5-0.6)" + ) + mass_range_text = gr.Textbox( + label="Enter Mass Range in kg (e.g., 1.1-1.2)" + ) + asset_version_text = gr.Textbox( + label=f"Enter version (e.g., {VERSION})" + ) + with gr.Row(): + extract_urdf_btn = gr.Button( + "🧩 2. Extract URDF with physics(~1 mins)", + variant="primary", + interactive=False, + ) + + gr.Markdown("Estimated Asset 3D Attributes(No input required)") + with gr.Row(): + est_type_text = gr.Textbox( + label="Asset category", interactive=False + ) + est_height_text = gr.Textbox( + label="Real height(.m)", interactive=False + ) + est_mass_text = gr.Textbox(label="Mass(.kg)", interactive=False) + est_mu_text = gr.Textbox( + label="Friction coefficient", interactive=False + ) + + with gr.Row(): + download_urdf = gr.DownloadButton( + label="⬇️ 3. Download URDF", + variant="primary", + interactive=False, + ) + + prompt_examples = [ + "satin gold tea cup with saucer", + "small bronze figurine of a lion", + "brown leather bag", + "Miniature cup with floral design", + "带木质底座, 具有经纬线的地球仪", + "橙色电动手钻, 有磨损细节", + "手工制作的皮革笔记本", + ] + examples = gr.Examples( + label="Gallery", + examples=prompt_examples, + inputs=[text_prompt], + examples_per_page=10, + ) + + with gr.Column(scale=2): + generated_model_output = gr.Model3D( + label="Generated 3D Asset", + height=700, + interactive=False, + ) + + model_output_glb = gr.State("") + + demo.load(start_session) + demo.unload(end_session) + + text_prompt.change( + active_btn_by_text_content, + inputs=[text_prompt], + outputs=[generate_btn], + ) + + text_prompt.change( + lambda: tuple( + [ + gr.Button(interactive=False), + gr.Button(interactive=False), + None, + "", + "", + "", + "", + "", + "", + "", + "", + "", + ] + ), + outputs=[ + extract_urdf_btn, + download_urdf, + generated_model_output, + model_output_obj, + asset_cat_text, + height_range_text, + mass_range_text, + asset_version_text, + est_type_text, + est_height_text, + est_mass_text, + est_mu_text, + ], + ) + + generate_btn.click( + get_seed, + inputs=[randomize_seed, seed], + outputs=[seed], + ).success( + text_to_3d_via_hunyuan_api, + inputs=[text_prompt, seed], + outputs=[generated_model_output, model_output_glb], + ).success( + lambda: gr.Button(interactive=True), + outputs=[extract_urdf_btn], + ) + + extract_urdf_btn.click( + extract_urdf_lazy, + inputs=[ + model_output_glb, + model_output_obj, + asset_cat_text, + height_range_text, + mass_range_text, + asset_version_text, + ], + outputs=[ + download_urdf, + est_type_text, + est_height_text, + est_mass_text, + est_mu_text, + ], + queue=True, + show_progress="full", + ).success( + lambda: gr.Button(interactive=True), + outputs=[download_urdf], + ) + + +if __name__ == "__main__": + demo.launch(server_port=8082)