From d5d9fd11b38481927171c6ee3e7d9d6b5b713d79 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 30 Nov 2023 15:37:41 +0100 Subject: [PATCH] [tests] resolve skipped HF tests: 1st batch (#21305) * initial * some corrections for the first batch * corrected classes for generate, corrected xfail * leave only models with correct example input * remove an obsolete line * Update tests/model_hub_tests/torch_tests/test_hf_transformers.py Co-authored-by: Maxim Vafin * update musicgen * cleanup test_hf_transformers.py * typo fix * Update tests/model_hub_tests/torch_tests/test_hf_transformers.py * Update tests/model_hub_tests/torch_tests/test_hf_transformers.py * move to up: corrected xfail * revert back accidentally deleted elif * Update tests/model_hub_tests/torch_tests/test_hf_transformers.py * Update tests/model_hub_tests/torch_tests/test_hf_transformers.py --------- Co-authored-by: Maxim Vafin --- .../torch_tests/hf_transformers_models | 6 +-- .../torch_tests/test_hf_transformers.py | 41 +++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/tests/model_hub_tests/torch_tests/hf_transformers_models b/tests/model_hub_tests/torch_tests/hf_transformers_models index b63e9eae171..cf699be6289 100644 --- a/tests/model_hub_tests/torch_tests/hf_transformers_models +++ b/tests/model_hub_tests/torch_tests/hf_transformers_models @@ -74,8 +74,8 @@ facebook/flava-image-codebook,flava_image_codebook,skip,Load problem facebook/m2m100_418M,m2m_100 facebook/mask2former-swin-base-coco-panoptic,mask2former facebook/maskformer-swin-base-coco,maskformer -facebook/mms-tts-eng,vits,skip,Load problem -facebook/musicgen-small,musicgen,skip,Load problem +facebook/mms-tts-eng,vits,xfail,Accuracy failed: results cannot be broadcasted +facebook/musicgen-small,musicgen facebook/opt-125m,opt facebook/rag-token-nq,rag,skip,Load problem facebook/sam-vit-large,sam,xfail,No node with name original_sizes @@ -104,7 +104,7 @@ google/mobilebert-uncased,mobilebert google/mobilenet_v1_0.75_192,mobilenet_v1 google/mt5-base,mt5 google/owlvit-base-patch32,owlvit -google/pix2struct-docvqa-base,pix2struct,skip,Load problem +google/pix2struct-docvqa-base,pix2struct google/realm-orqa-nq-openqa,realm,skip,Load problem google/reformer-crime-and-punishment,reformer,xfail,Tracing problem google/tapas-large-finetuned-wtq,tapas diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py index 6ad74a8ffb5..e13fa5a6b9c 100644 --- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py +++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py @@ -88,6 +88,26 @@ class TestTransformersModel(TestTorchConvertModel): model = VIT_GPT2_Model(model) example = (encoded_input.pixel_values,) + elif 'pix2struct' in mi.tags: + from transformers import AutoProcessor, Pix2StructForConditionalGeneration + model = Pix2StructForConditionalGeneration.from_pretrained(name) + processor = AutoProcessor.from_pretrained(name) + + import requests + from PIL import Image + image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg" + image = Image.open(requests.get(image_url, stream=True).raw) + question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud" + inputs = processor(images=image, text=question, return_tensors="pt") + example = dict(inputs) + + class DecoratorModelForSeq2SeqLM(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + def forward(self, flattened_patches, attention_mask): + return self.model.generate(flattened_patches=flattened_patches, attention_mask=attention_mask) + model = DecoratorModelForSeq2SeqLM(model) elif "mms-lid" in name: # mms-lid model config does not have auto_model attribute, only direct loading available from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor @@ -149,6 +169,27 @@ class TestTransformersModel(TestTorchConvertModel): 0, 255, [16, 3, 224, 224]).to(torch.float32)) inputs = processor(video, return_tensors="pt") example = dict(inputs) + elif 'text-to-speech' in mi.tags: + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(name) + text = "some example text in the English language" + inputs = tokenizer(text, return_tensors="pt") + example = dict(inputs) + elif 'musicgen' in mi.tags: + from transformers import AutoProcessor, AutoModelForTextToWaveform + processor = AutoProcessor.from_pretrained(name) + model = AutoModelForTextToWaveform.from_pretrained(name, torchscript=True) + + inputs = processor( + text=["80s pop track with bassy drums and synth"], + padding=True, + return_tensors="pt", + ) + example = dict(inputs) + # works for facebook/musicgen-small + pad_token_id = model.generation_config.pad_token_id + example["decoder_input_ids"] = torch.ones( + (inputs.input_ids.shape[0] * model.decoder.num_codebooks, 1), dtype=torch.long) * pad_token_id else: try: if auto_model == "AutoModelForCausalLM":