Option to return original prompts instead of phrases

Currently, LangSAM returns `phrases` which are not a 1-1 mapping back to the original text prompts specified by the caller. This update adds a `return_prompts:bool` argument to the `LangSAM` constructor, which will allow this functionality. By default, it keeps the original functionality,
luca-medeiros · Apr 12, 2024 · a1a9557 · a1a9557
1 parent f4ca296
commit a1a9557
Showing 1 changed file with 3 additions and 1 deletion.
diff --git a/lang_sam/lang_sam.py b/lang_sam/lang_sam.py
@@ -49,8 +49,9 @@ def transform_image(image) -> torch.Tensor:
 
 class LangSAM():
 
- def __init__(self, sam_type="vit_h", ckpt_path=None):
+ def __init__(self, sam_type="vit_h", ckpt_path=None, return_prompts: bool = False):
  self.sam_type = sam_type
+ self.return_prompts = return_prompts
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  self.build_groundingdino()
  self.build_sam(ckpt_path)
@@ -94,6 +95,7 @@ def predict_dino(self, image_pil, text_prompt, box_threshold, text_threshold):
  caption=text_prompt,
  box_threshold=box_threshold,
  text_threshold=text_threshold,
+ remove_combined=self.return_prompts,
  device=self.device)
  W, H = image_pil.size
  boxes = box_ops.box_cxcywh_to_xyxy(boxes) * torch.Tensor([W, H, W, H])