用OFA进行ocr训练任务
import osimport pandas as pdimport chardetfrom PIL import Imagefrom datasets import Datasetimport tempfilefrom modelscope.msdatasets import MsDatasetfrom modelscope.metainfo import Trainersfrom modelscope.trainers import build_trainerfrom modelscope.utils.constant import DownloadMode
with open('./ocr_labels_modelscope.csv', 'rb') as f: result = chardet.detect(f.read())data = pd.read_csv('./ocr_labels_modelscope.csv', encoding=result['encoding'])ds = Dataset.from_pandas(data)ds = MsDataset(ds)print(next(iter(ds)))文件格式:image_id,text,image000000000,硖,/mnt/workspace/images/000000000.jpg000000001,饰,/mnt/workspace/images/000000001.jpg000000002,晟,/mnt/workspace/images/000000002.jpg
赞0
踩0