import docx
def read_word_document(file_path):
doc = docx.Document(file_path)
paragraphs = [p.text for p in doc.paragraphs]
return paragraphs
def remove_extra_newlines(paragraphs):
cleaned_paragraphs = []
for paragraph in paragraphs:
cleaned_paragraph = paragraph.replace("\n", " ")
cleaned_paragraphs.append(cleaned_paragraph)
return cleaned_paragraphs
def save_modified_document(paragraphs, output_file_path):
doc = docx.Document()
for paragraph in paragraphs:
doc.add_paragraph(paragraph)
doc.save(output_file_path)
input_file = "input.docx"
output_file = "output.docx"
paragraphs = read_word_document(input_file)
cleaned_paragraphs = remove_extra_newlines(paragraphs)
save_modified_document(cleaned_paragraphs, output_file)
参考博文