Source code for uccaapp.upload_streussel_passages
#!/usr/bin/env python3
import sys
import argparse
from ucca.convert import from_text, to_json
from uccaapp.api import ServerAccessor
desc = """Upload a passage from a streussel format file"""
[docs]class StreusselPassageUploader(ServerAccessor):
def __init__(self, user_id, source_id, project_id, **kwargs):
super().__init__(**kwargs)
self.set_source(source_id)
self.set_project(project_id)
self.set_user(user_id)
[docs] def upload_streussel_passage_file(self, filenames, log=None, **kwargs):
del kwargs
log_h = open(log, "w", encoding="utf-8") if log else None
with open(filenames) as f_all:
for filename in f_all:
passage_text = ""
external_id = "None given"
filename = filename.strip()
with open(filename, encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
elif line.startswith("#"):
fields = line.split()
if len(fields) != 4 or fields[1] != "sent_id":
print("FORMAT ERROR in " + filename, file=sys.stderr)
else:
external_id = fields[3].split("-")[1]
else:
passage_text = passage_text + " " + line
passage_out = self.create_passage(text=passage_text.strip(), external_id=external_id, type="PUBLIC",
source=self.source)
task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project,
user=self.user, passage=passage_out, manager_comment="External ID: " + external_id,
user_comment="", parent=None, is_demo=False, is_active=True)
tok_task_out = self.create_task(**task_in)
tok_user_task_in = dict(tok_task_out)
passage = list(from_text(passage_text.split(), tokenized=True))[0]
tok_user_task_in.update(to_json(passage, return_dict=True, tok_task=True))
self.submit_task(**tok_user_task_in)
print("Uploaded passage " + filename + " successfully.", file=sys.stderr)
if log:
print(filename.split(".")[-2], passage_out["id"], tok_task_out["id"], file=log_h, sep="\t")
if log:
log_h.close()
[docs] @staticmethod
def add_arguments(argparser):
argparser.add_argument("filenames", help="passage file names to convert and upload")
argparser.add_argument("-l", "--log", help="filename to write log of uploaded passages to")
ServerAccessor.add_project_id_argument(argparser)
ServerAccessor.add_source_id_argument(argparser)
ServerAccessor.add_user_id_argument(argparser)
ServerAccessor.add_arguments(argparser)
[docs]def main(**kwargs):
StreusselPassageUploader(**kwargs).upload_streussel_passage_file(**kwargs)
if __name__ == "__main__":
argument_parser = argparse.ArgumentParser(description=desc)
StreusselPassageUploader.add_arguments(argument_parser)
main(**vars(argument_parser.parse_args()))
sys.exit(0)