Source code for openhands.datasets.isolated.fingerspelling

import os
import pandas as pd
from .base import BaseIsolatedDataset
import yaml


[docs]class FingerSpellingDataset(BaseIsolatedDataset): """ Fingerspelling datasets : 'Argentine', 'American', 'Chinese', 'Indian', 'German', 'Greek', 'Turkish' """
[docs] def read_glosses(self): all_glosses=[] for lang_code in self.language_set: df = pd.read_csv(self.root_dir+"/"+lang_code+"/"+"glosses.csv", header = None) lang_glosses = [df.iloc[i][0].lower() for i in range(len(df))] all_glosses.extend(lang_glosses) self.glosses = sorted(set(all_glosses)) print(self.glosses)
[docs] def read_original_dataset(self): """ Divided all fingerspelling datasets to 80-20 as test train split """ for lang_code in self.languages: df = pd.read_csv(self.root_dir+"/"+lang_code+"/"+"glosses.csv", header = None) for i in range(df.shape[0]): directory = self.root_dir+"/"+lang_code+"/pkl_poses/"+self.splits+"/"+df.iloc[i][0] for name in os.listdir(directory): filename = name.split(".")[0] f = directory + "/" + filename gloss = df.iloc[i][0].lower() gloss_cat = self.gloss_to_id[gloss.strip(' \n\t')] instance_entry = f, gloss_cat self.data.append(instance_entry)