|
| 1 | +import re |
| 2 | + |
| 3 | +def convert_code_blocks(html_content): |
| 4 | + """ |
| 5 | + Find all <code></code> blocks and convert < and > to < and > within them. |
| 6 | + """ |
| 7 | + def replace_angle_brackets(match): |
| 8 | + code_content = match.group(1) |
| 9 | + # Replace < with < and > with > |
| 10 | + code_content = code_content.replace('<', '<').replace('>', '>') |
| 11 | + return f"<code>{code_content}</code>" |
| 12 | + |
| 13 | + # Use regex to find all <code>...</code> blocks |
| 14 | + pattern = r'<code>(.*?)</code>' |
| 15 | + # re.DOTALL allows . to match newlines as well |
| 16 | + result = re.sub(pattern, replace_angle_brackets, html_content, flags=re.DOTALL) |
| 17 | + |
| 18 | + return result |
| 19 | + |
| 20 | +def process_html_file(input_file, output_file): |
| 21 | + """ |
| 22 | + Process an HTML file, converting code blocks, and save to output file. |
| 23 | + """ |
| 24 | + try: |
| 25 | + # Read the input file |
| 26 | + with open(input_file, 'r', encoding='utf-8') as f: |
| 27 | + html_content = f.read() |
| 28 | + |
| 29 | + # Convert code blocks |
| 30 | + processed_content = convert_code_blocks(html_content) |
| 31 | + |
| 32 | + # Write to output file |
| 33 | + with open(output_file, 'w', encoding='utf-8') as f: |
| 34 | + f.write(processed_content) |
| 35 | + |
| 36 | + print(f"Successfully processed {input_file} and saved to {output_file}") |
| 37 | + |
| 38 | + except Exception as e: |
| 39 | + print(f"Error processing file: {e}") |
| 40 | + |
| 41 | +if __name__ == "__main__": |
| 42 | + import sys |
| 43 | + |
| 44 | + if len(sys.argv) < 3: |
| 45 | + print("Usage: python script.py input_file.html output_file.html") |
| 46 | + sys.exit(1) |
| 47 | + |
| 48 | + input_file = sys.argv[1] |
| 49 | + output_file = sys.argv[2] |
| 50 | + |
| 51 | + process_html_file(input_file, output_file) |
0 commit comments