erikbern · December 7, 2022 22:11
diff --git a/batch_inference_hugginface.py b/batch_inference_hugginface.py
 import sys

 import modal

 stub = modal.Stub(
    image=modal.Image.debian_slim().pip_install(["datasets", "torch", "transformers"])
 )


 class Predictor:
    def __enter__(self):
        from transformers import pipeline

        self.sentiment_pipeline = pipeline(
            model="distilbert-base-uncased-finetuned-sst-2-english"
        )

    @stub.function(cpu=4)
    def predict(self, phrase: str):
        pred = self.sentiment_pipeline(phrase, truncation=True, max_length=512, top_k=2)
        # pred will look like: [{'label': 'NEGATIVE', 'score': 0.99}, {'label': 'POSITIVE', 'score': 0.01}]
        probs = {p["label"]: p["score"] for p in pred}
        return (phrase, probs["POSITIVE"])


 @stub.function
 def get_data():
    from datasets import load_dataset

    imdb = load_dataset("imdb")
    return [row["text"] for row in imdb["test"]]


 if __name__ == "__main__":
    with stub.run():
        data = get_data()
        for phrase, score in Predictor().predict.map(data):
            print(f"{score:.4f} {phrase[:80]}")
	import sys

	import modal

	stub = modal.Stub(
	image=modal.Image.debian_slim().pip_install(["datasets", "torch", "transformers"])
	)


	class Predictor:
	def __enter__(self):
	from transformers import pipeline

	self.sentiment_pipeline = pipeline(
	model="distilbert-base-uncased-finetuned-sst-2-english"
	)

	@stub.function(cpu=4)
	def predict(self, phrase: str):
	pred = self.sentiment_pipeline(phrase, truncation=True, max_length=512, top_k=2)
	# pred will look like: [{'label': 'NEGATIVE', 'score': 0.99}, {'label': 'POSITIVE', 'score': 0.01}]
	probs = {p["label"]: p["score"] for p in pred}
	return (phrase, probs["POSITIVE"])


	@stub.function
	def get_data():
	from datasets import load_dataset

	imdb = load_dataset("imdb")
	return [row["text"] for row in imdb["test"]]


	if __name__ == "__main__":
	with stub.run():
	data = get_data()
	for phrase, score in Predictor().predict.map(data):
	print(f"{score:.4f} {phrase[:80]}")