diff --git a/tests/cli_tests.py b/tests/cli_tests.py
index 42f7e884..de2062ae 100644
--- a/tests/cli_tests.py
+++ b/tests/cli_tests.py
@@ -448,7 +448,7 @@ def test_crawling():
 
 def test_probing():
     "Test webpage probing functions."
-    url = 'https://httpbun.org/html'
+    url = 'https://example.org/'
     testargs = ['', '--probe', url, '--target-language', 'de']
     with patch.object(sys, 'argv', testargs):
         args = cli.parse_args(testargs)
diff --git a/trafilatura/cli_utils.py b/trafilatura/cli_utils.py
index 876b1d91..0db1f3d8 100644
--- a/trafilatura/cli_utils.py
+++ b/trafilatura/cli_utils.py
@@ -42,45 +42,45 @@
 STRIP_DIR = re.compile(r'[^/]+$')
 STRIP_EXTENSION = re.compile(r'\.[a-z]{2,5}$')
 
+INPUT_URLS_ARGS = ['URL', 'crawl', 'explore', 'probe', 'feed', 'sitemap']
+
+EXTENSION_MAPPING = {
+    'csv': '.csv',
+    'json': '.json',
+    'xml': '.xml',
+    'xmltei': '.xml',
+}
+
 
 def load_input_urls(args):
     '''Read list of URLs to process or derive one from command-line arguments'''
+    input_urls = []
+
     if args.input_file:
-        input_urls = []
         try:
             # optional: errors='strict', buffering=1
             with open(args.input_file, mode='r', encoding='utf-8') as inputfile:
-                for line in inputfile:
-                    input_urls.append(line.strip())
+                input_urls.extend(line.strip() for line in inputfile)
         except UnicodeDecodeError:
             sys.exit('ERROR: system, file type or buffer encoding')
-    elif args.URL:
-        input_urls = [args.URL]
-    elif args.crawl:
-        input_urls = [args.crawl]
-    elif args.explore:
-        input_urls = [args.explore]
-    elif args.probe:
-        input_urls = [args.probe]
-    elif args.feed:
-        input_urls = [args.feed]
-    elif args.sitemap:
-        input_urls = [args.sitemap]
     else:
+        for arg in INPUT_URLS_ARGS:
+            if getattr(args, arg):
+                input_urls = [getattr(args, arg)]
+                break
+
+    if not input_urls:
         LOGGER.warning('No input provided')
-        input_urls = []
+
     # uniq URLs while preserving order (important)
     return uniquify_list(input_urls)
 
 
 def load_blacklist(filename):
     '''Read list of unwanted URLs'''
-    blacklist = set()
-    with open(filename, mode='r', encoding='utf-8') as inputfh:
-        for line in inputfh:
-            url = line.strip()
-            # if validate_url(url)[0] is True:
-            blacklist.add(URL_BLACKLIST_REGEX.sub('', url))
+    with open(filename, 'r', encoding='utf-8') as inputfh:
+        # if validate_url(url)[0] is True:
+        blacklist = {URL_BLACKLIST_REGEX.sub('', line.strip()) for line in inputfh}
     return blacklist
 
 
@@ -139,28 +139,23 @@ def get_writable_path(destdir, extension):
 
 def determine_output_path(args, orig_filename, content, counter=None, new_filename=None):
     '''Pick a directory based on selected options and a file name based on output type'''
-    # determine extension
-    extension = '.txt'
-    if args.output_format in ('xml', 'xmltei'):
-        extension = '.xml'
-    elif args.output_format == 'csv':
-        extension = '.csv'
-    elif args.output_format == 'json':
-        extension = '.json'
-    # determine directory
-    if args.keep_dirs is True:
+    # determine extension, TXT by default
+    extension = EXTENSION_MAPPING.get(args.output_format, '.txt')
+
+    if args.keep_dirs:
         # strip directory
-        orig_directory = STRIP_DIR.sub('', orig_filename)
-        destination_directory = path.join(args.output_dir, orig_directory)
+        original_dir = STRIP_DIR.sub('', orig_filename)
+        destination_dir = path.join(args.output_dir, original_dir)
         # strip extension
         filename = STRIP_EXTENSION.sub('', orig_filename)
-        output_path = path.join(args.output_dir, filename + extension)
     else:
-        destination_directory = determine_counter_dir(args.output_dir, counter)
+        destination_dir = determine_counter_dir(args.output_dir, counter)
         # use cryptographic hash on file contents to define name
         filename = new_filename or generate_hash_filename(content)
-        output_path = path.join(destination_directory, filename + extension)
-    return output_path, destination_directory
+
+    output_path = path.join(destination_dir, filename + extension)
+    return output_path, destination_dir
+
 
 
 def archive_html(htmlstring, args, counter=None):
@@ -182,9 +177,9 @@ def write_result(result, args, orig_filename=None, counter=None, new_filename=No
     if args.output_dir is None:
         sys.stdout.write(result + '\n')
     else:
-        destination_path, destination_directory = determine_output_path(args, orig_filename, result, counter, new_filename)
+        destination_path, destination_dir = determine_output_path(args, orig_filename, result, counter, new_filename)
         # check the directory status
-        if check_outputdir_status(destination_directory) is True:
+        if check_outputdir_status(destination_dir) is True:
             with open(destination_path, mode='w', encoding='utf-8') as outputfile:
                 outputfile.write(result)
 
@@ -268,8 +263,8 @@ def cli_discovery(args):
 
 def build_exploration_dict(url_store, input_urls, args):
     "Find domains for which nothing has been found and add info to the crawl dict."
-    input_domains = set(extract_domain(u) for u in input_urls)
-    known_domains = set(extract_domain(u) for u in url_store.get_known_domains())
+    input_domains = {extract_domain(u) for u in input_urls}
+    known_domains = {extract_domain(u) for u in url_store.get_known_domains()}
     still_to_crawl = input_domains - known_domains
     new_input_urls = [u for u in input_urls if extract_domain(u) in still_to_crawl]
     control_dict = add_to_compressed_dict(
diff --git a/trafilatura/core.py b/trafilatura/core.py
index e89a6f81..d15164e7 100644
--- a/trafilatura/core.py
+++ b/trafilatura/core.py
@@ -827,8 +827,8 @@ def determine_returnstring(document, output_format, include_formatting, tei_vali
     else:
         returnstring = xmltotxt(document.body, include_formatting)
         if document.commentsbody is not None:
-            returnstring += '\n' + xmltotxt(document.commentsbody, include_formatting)
-            returnstring = returnstring.strip()
+            comments_text = xmltotxt(document.commentsbody, include_formatting)
+            returnstring = f"{returnstring}\n{comments_text}".strip()
     # normalize Unicode format (defaults to NFC)
     return normalize_unicode(returnstring)
 
diff --git a/trafilatura/downloads.py b/trafilatura/downloads.py
index aa6b229e..13c17d2e 100644
--- a/trafilatura/downloads.py
+++ b/trafilatura/downloads.py
@@ -206,28 +206,21 @@ def is_live_page(url):
 
 def add_to_compressed_dict(inputlist, blacklist=None, url_filter=None, url_store=None, compression=False, verbose=False):
     '''Filter, convert input URLs and add them to domain-aware processing dictionary'''
-    # init
     if url_store is None:
         url_store = UrlStore(
                         compressed=compression,
                         strict=False,
                         verbose=verbose
                     )
-    # deduplicate while keeping order
+
     inputlist = uniquify_list(inputlist)
-    # filter
+
     if blacklist:
         inputlist = [u for u in inputlist if URL_BLACKLIST_REGEX.sub('', u) not in blacklist]
+
     if url_filter:
-        filtered_list = []
-        while inputlist:
-            u = inputlist.pop()
-            for f in url_filter:
-                if f in u:
-                    filtered_list.append(u)
-                    break
-        inputlist = filtered_list
-    # validate and store
+        inputlist = [u for u in inputlist if any(f in u for f in url_filter)]
+
     url_store.add_urls(inputlist)
     return url_store
 
diff --git a/trafilatura/filters.py b/trafilatura/filters.py
index 5f532b23..ca1637b7 100644
--- a/trafilatura/filters.py
+++ b/trafilatura/filters.py
@@ -61,21 +61,15 @@ def check_html_lang(tree, target_language, strict=False):
     '''Check HTML meta-elements for language information and split
        the result in case there are several languages'''
     # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language
-    target_elements = tree.findall('.//meta[@http-equiv="content-language"][@content]')
-    if target_elements:
-        for elem in target_elements:
-            if target_language in RE_HTML_LANG.split(elem.get('content').lower()):
-                return True
-        LOGGER.debug('HTML content-language failed')
-        return False
-    # locale
-    target_elements = tree.findall('.//meta[@property="og:locale"][@content]')
-    if target_elements:
-        for elem in target_elements:
-            if target_language in RE_HTML_LANG.split(elem.get('content').lower()):
-                return True
-        LOGGER.debug('HTML og:locale failed')
-        return False
+    target_attrs = ['http-equiv="content-language"', 'property="og:locale"']
+    for attr in target_attrs:
+        target_elements = tree.findall(f'.//meta[@{attr}][@content]')
+        if target_elements:
+            for elem in target_elements:
+                if target_language in RE_HTML_LANG.split(elem.get('content', '').lower()):
+                    return True
+            LOGGER.debug('%s failed', attr)
+            return False
     # HTML lang attribute: sometimes a wrong indication
     if strict is True:
         target_elements = tree.xpath('//html[@lang]')
@@ -122,19 +116,13 @@ def language_filter(temp_text, temp_comments, target_language, docmeta):
 
 def textfilter(element):
     '''Filter out unwanted text'''
-    # print('#', element.text)
-    if element.text is None and element.tail is not None:
-        testtext = element.tail
-    else:
-        testtext = element.text
-    if text_chars_test(testtext) is False:
-        return True
+    testtext = element.tail if element.text is None else element.text
     # to check: line len → continue if len(line) <= 5
-    return any(RE_FILTER.match(line) for line in testtext.splitlines())
+    return not text_chars_test(testtext) or any(map(RE_FILTER.match, testtext.splitlines()))
 
 
 def text_chars_test(string):
     '''Determine if a string is only composed of spaces and/or control characters'''
     # or not re.search(r'\w', string)
     # return string is not None and len(string) != 0 and not string.isspace()
-    return string not in (None, '') and not string.isspace()
+    return bool(string) and not string.isspace()
diff --git a/trafilatura/htmlprocessing.py b/trafilatura/htmlprocessing.py
index 809c4ccc..47c17596 100644
--- a/trafilatura/htmlprocessing.py
+++ b/trafilatura/htmlprocessing.py
@@ -43,6 +43,21 @@
 )
 
 
+REND_TAG_MAPPING = {
+    'em': '#i',
+    'i': '#i',
+    'b': '#b',
+    'strong': '#b',
+    'u': '#u',
+    'kbd': '#t',
+    'samp': '#t',
+    'tt': '#t',
+    'var': '#t',
+    'sub': '#sub',
+    'sup': '#sup'
+}
+
+
 def tree_cleaning(tree, options):
     '''Prune the tree by discarding unwanted elements'''
     # determine cleaning strategy, use lists to keep it deterministic
@@ -117,19 +132,16 @@ def collect_link_info(links_xpath, favor_precision=False):
     # init
     shortelems, mylist = 0, []
     # longer strings impact recall in favor of precision
-    if favor_precision is False:
-        threshold = 10
-    else:
-        threshold = 50
+    threshold = 10 if not favor_precision else 50
     # examine the elements
     for subelem in links_xpath:
         subelemtext = trim(subelem.text_content())
-        if not subelemtext:
-            continue
-        mylist.append(subelemtext)
-    lengths = [len(text) for text in mylist]
-    shortelems = len([l for l in lengths if l < threshold])
-    return sum(lengths), len(mylist), shortelems, mylist
+        if subelemtext:
+            mylist.append(subelemtext)
+            if len(subelemtext) < threshold:
+                shortelems += 1
+    lengths = sum(len(text) for text in mylist)
+    return lengths, len(mylist), shortelems, mylist
 
 
 def link_density_test(element, text, favor_precision=False):
@@ -222,10 +234,9 @@ def convert_tags(tree, options, url=None):
     '''Simplify markup and convert relevant HTML tags to an XML standard'''
     # delete links for faster processing
     if options.links is False:
+        xpath_expr = './/div//a|.//ul//a'  # .//p//a ?
         if options.tables is True:
-            xpath_expr = './/div//a|.//table//a|.//ul//a'  # .//p//a ?
-        else:
-            xpath_expr = './/div//a|.//ul//a'  # .//p//a ?
+            xpath_expr += '|.//table//a'
         # necessary for further detection
         for elem in tree.xpath(xpath_expr):
             elem.tag = 'ref'
@@ -246,32 +257,12 @@ def convert_tags(tree, options, url=None):
                 elem.set('target', target)
     # include_formatting
     if options.formatting is False:
-        strip_tags(tree, 'em', 'i', 'b', 'strong', 'u', 'kbd', 'samp', 'tt', 'var', 'sub', 'sup')
+        strip_tags(tree, *REND_TAG_MAPPING)
     else:
-        for elem in tree.iter('em', 'i', 'b', 'strong', 'u', 'kbd', 'samp', 'tt', 'var', 'sub', 'sup'):
-            # italics
-            if elem.tag in ('em', 'i'):
-                elem.tag = 'hi'
-                elem.set('rend', '#i')
-            # bold font
-            elif elem.tag in ('b', 'strong'):
-                elem.tag = 'hi'
-                elem.set('rend', '#b')
-            # u (very rare)
-            elif elem.tag == 'u':
-                elem.tag = 'hi'
-                elem.set('rend', '#u')
-            # tt (very rare)
-            elif elem.tag in ('kbd', 'samp', 'tt', 'var'):
-                elem.tag = 'hi'
-                elem.set('rend', '#t')
-            # sub and sup (very rare)
-            elif elem.tag == 'sub':
-                elem.tag = 'hi'
-                elem.set('rend', '#sub')
-            elif elem.tag == 'sup':
-                elem.tag = 'hi'
-                elem.set('rend', '#sup')
+        for elem in tree.iter(list(REND_TAG_MAPPING)):
+            attribute = REND_TAG_MAPPING[elem.tag]
+            elem.tag = 'hi'
+            elem.set('rend', attribute)
     # iterate over all concerned elements
     for elem in tree.iter('blockquote', 'br', 'del', 'details', 'dl', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'pre', 'q', 's', 'strike', 'ul'):
         # ul/ol → list / li → item
@@ -282,7 +273,7 @@ def convert_tags(tree, options, url=None):
             for subelem in elem.iter('dd', 'dt', 'li'):
                 # keep track of dd/dt items
                 if subelem.tag in ('dd', 'dt'):
-                    subelem.set('rend', subelem.tag + '-' + str(i))
+                    subelem.set('rend', f"{subelem.tag}-{i}")
                     # increment counter after <dd> in description list
                     if subelem.tag == 'dd':
                         i += 1
@@ -330,10 +321,9 @@ def handle_textnode(element, options, comments_fix=True, preserve_spaces=False):
     if element.text is None:
         # try the tail
         # LOGGER.debug('using tail for element %s', element.tag)
-        element.text = element.tail
-        element.tail = ''
+        element.text, element.tail = element.tail, ''
         # handle differently for br/lb
-        if comments_fix is True and element.tag == 'lb':
+        if comments_fix and element.tag == 'lb':
             element.tag = 'p'
     # trim
     if preserve_spaces is False:
@@ -341,9 +331,8 @@ def handle_textnode(element, options, comments_fix=True, preserve_spaces=False):
         if element.tail:
             element.tail = trim(element.tail)
     # filter content
-    if not element.text:  # or not re.search(r'\w', element.text):  # text_content()?
-        return None
-    if textfilter(element) is True:
+    # or not re.search(r'\w', element.text):  # text_content()?
+    if not element.text or textfilter(element) is True:  
         return None
     if options.dedup and duplicate_test(element, options.config) is True:
         return None
@@ -360,8 +349,7 @@ def process_node(element, options):
     element.text, element.tail = trim(element.text), trim(element.tail)
     # adapt content string
     if element.tag != 'lb' and not element.text and element.tail:
-        element.text = element.tail
-        element.tail = None
+        element.text, element.tail = element.tail, None
     # content checks
     if element.text or element.tail:
         if textfilter(element) is True:
diff --git a/trafilatura/utils.py b/trafilatura/utils.py
index f84880f6..2c6e5e05 100644
--- a/trafilatura/utils.py
+++ b/trafilatura/utils.py
@@ -243,9 +243,7 @@ def txttocsv(text, comments, docmeta):
 @lru_cache(maxsize=2**14)  # sys.maxunicode = 1114111
 def return_printables_and_spaces(char):
     'Return a character if it belongs to certain classes'
-    if char.isprintable() or char.isspace():
-        return char
-    return ''
+    return char if char.isprintable() or char.isspace() else ''
 
 
 def remove_control_characters(string):
diff --git a/trafilatura/xml.py b/trafilatura/xml.py
index c3639ce8..9c8b2485 100644
--- a/trafilatura/xml.py
+++ b/trafilatura/xml.py
@@ -27,29 +27,45 @@
                   'item', 'lb', 'list', 'p', 'quote', 'ref', 'row', 'table'}
 TEI_VALID_ATTRS = {'rend', 'rendition', 'role', 'target', 'type'}
 TEI_RELAXNG = None  # to be downloaded later if necessary
+TEI_REMOVE_TAIL = {"ab", "p"}
 
 CONTROL_PARSER = XMLParser(remove_blank_text=True)
 
-NEWLINE_ELEMS = {'code', 'graphic', 'head', 'lb', 'list', 'p', 'quote', 'row', 'table'}
+NEWLINE_ELEMS = {
+    'cell': '|',
+    'item': '\n- ',
+    **{tag: '\n' for tag in ['code', 'graphic', 'head', 'lb', 'list', 'p', 'quote', 'row', 'table']}
+}
 SPECIAL_FORMATTING = {'del', 'head', 'hi'}
 WITH_ATTRIBUTES = {'cell', 'del', 'graphic', 'head', 'hi', 'item', 'list', 'ref'}
 
 NESTING_WHITELIST = {"cell", "figure", "item", "note", "quote"}
 
+META_ATTRIBUTES = [
+    'sitename', 'title', 'author', 'date', 'url', 'hostname',
+    'description', 'categories', 'tags', 'license', 'id',
+    'fingerprint', 'language'
+]
+
+HI_FORMATTING = {'#b': '**', '#i': '*', '#u': '__', '#t': '`'}
+
 
 def build_json_output(docmeta):
     '''Build JSON output based on extracted information'''
     outputdict = {slot: getattr(docmeta, slot, None) for slot in docmeta.__slots__}
-    outputdict['source'] = outputdict.pop('url')
-    outputdict['source-hostname'] = outputdict.pop('sitename')
-    outputdict['excerpt'] = outputdict.pop('description')
-    outputdict['categories'] = ';'.join(outputdict['categories'])
-    outputdict['tags'] = ';'.join(outputdict['tags'])
-    outputdict['text'] = xmltotxt(outputdict.pop('body'), include_formatting=False)
-    if outputdict['commentsbody'] is not None:
-        outputdict['comments'] = xmltotxt(outputdict.pop('commentsbody'), include_formatting=False)
-    else:
-        del outputdict['commentsbody']
+    outputdict.update({
+        'source': outputdict.pop('url'),
+        'source-hostname': outputdict.pop('sitename'),
+        'excerpt': outputdict.pop('description'),
+        'categories': ';'.join(outputdict.pop('categories')),
+        'tags': ';'.join(outputdict.pop('tags')),
+        'text': xmltotxt(outputdict.pop('body'), include_formatting=False),
+    })
+
+    commentsbody = outputdict.pop('commentsbody')
+    if commentsbody is not None:
+        outputdict['comments'] = xmltotxt(commentsbody, include_formatting=False)
+
     return json_dumps(outputdict, ensure_ascii=False)
 
 
@@ -112,34 +128,10 @@ def control_xml_output(output_tree, output_format, tei_validation, docmeta):
 
 def add_xml_meta(output, docmeta):
     '''Add extracted metadata to the XML output tree'''
-    # metadata
-    if docmeta:
-        if docmeta.sitename is not None:
-            output.set('sitename', docmeta.sitename)
-        if docmeta.title is not None:
-            output.set('title', docmeta.title)
-        if docmeta.author is not None:
-            output.set('author', docmeta.author)
-        if docmeta.date is not None:
-            output.set('date', docmeta.date)
-        if docmeta.url is not None:
-            output.set('source', docmeta.url)
-        if docmeta.hostname is not None:
-            output.set('hostname', docmeta.hostname)
-        if docmeta.description is not None:
-            output.set('excerpt', docmeta.description)
-        if docmeta.categories is not None:
-            output.set('categories', ';'.join(docmeta.categories))
-        if docmeta.tags is not None:
-            output.set('tags', ';'.join(docmeta.tags))
-        if docmeta.license is not None:
-            output.set('license', docmeta.license)
-        if docmeta.id is not None:
-            output.set('id', docmeta.id)
-        if docmeta.fingerprint is not None:
-            output.set('fingerprint', docmeta.fingerprint)
-        if docmeta.language is not None:
-            output.set('language', docmeta.language)
+    for attribute in META_ATTRIBUTES:
+        value = getattr(docmeta, attribute, None)
+        if value is not None:
+            output.set(attribute, value if isinstance(value, str) else ';'.join(value))
     return output
 
 
@@ -168,13 +160,13 @@ def check_tei(xmldoc, url):
             _move_element_one_level_up(elem)
     # convert <lb/> when child of <div> to <p>
     for element in xmldoc.findall(".//text/body//div/lb"):
-        if element.tail is not None and element.tail.strip():
+        if element.tail and element.tail.strip():
             element.tag = 'p'
             element.text = element.tail
             element.tail = None
     # look for elements that are not valid
     for element in xmldoc.findall('.//text/body//*'):
-        if element.tag in {"ab", "p"} and element.tail and element.tail.strip():
+        if element.tag in TEI_REMOVE_TAIL and element.tail and element.tail.strip():
             _handle_unwanted_tails(element)
         # check elements
         if element.tag not in TEI_VALID_TAGS:
@@ -210,7 +202,6 @@ def validate_tei(xmldoc):  # , filename=""
 
 def replace_element_text(element, include_formatting):
     '''Determine element text based on text and tail'''
-    full_text = ''
     # handle formatting: convert to markdown
     if include_formatting is True and element.text is not None:
         if element.tag in ('del', 'head'):
@@ -219,53 +210,45 @@ def replace_element_text(element, include_formatting):
                     number = int(element.get('rend')[1])
                 except (TypeError, ValueError):
                     number = 2
-                element.text = ''.join(['#'*number, ' ', element.text])
+                element.text = f'{"#" * number} {element.text}'
             elif element.tag == 'del':
-                element.text = ''.join(['~~', element.text, '~~'])
+                element.text = f'~~{element.text}~~'
         elif element.tag == 'hi':
-            if element.get('rend') == '#b':
-                element.text = ''.join(['**', element.text, '**'])
-            elif element.get('rend') == '#i':
-                element.text = ''.join(['*', element.text, '*'])
-            elif element.get('rend') == '#u':
-                element.text = ''.join(['__', element.text, '__'])
-            elif element.get('rend') == '#t':
-                element.text = ''.join(['`', element.text, '`'])
+            rend = element.get('rend')
+            if rend in HI_FORMATTING:
+                element.text = f'{HI_FORMATTING[rend]}{element.text}{HI_FORMATTING[rend]}'
     # handle links
     if element.tag == 'ref':
         if element.text is not None:
+            link_text = f'[{element.text}]'
             if element.get('target') is not None:
-                element.text = ''.join(['[', element.text, ']', '(', element.get('target'), ')'])
+                element.text = f"{link_text}({element.get('target')})"
             else:
                 LOGGER.warning('missing link attribute: %s %s', element.text, element.attrib)
-                element.text = ''.join(['[', element.text, ']'])
+                element.text = link_text
         else:
             LOGGER.warning('empty link: %s %s', element.text, element.attrib)
     # handle text
-    if element.text is not None and element.tail is not None:
-        full_text = ''.join([element.text, element.tail])
-    elif element.text is not None:
-        full_text = element.text
-    elif element.tail is not None:
-        full_text = element.tail
-    return full_text
+    return (element.text or '') + (element.tail or '')
 
 
 def merge_with_parent(element, include_formatting=False):
     '''Merge element with its parent and convert formatting to markdown.'''
     parent = element.getparent()
-    if parent is None:
+    if not parent:
         return
+
     full_text = replace_element_text(element, include_formatting)
+
     previous = element.getprevious()
     if previous is not None:
         # There is a previous node, append text to its tail
         if previous.tail is not None:
-            previous.tail = ' '.join([previous.tail, full_text])
+            previous.tail = f'{previous.tail} {full_text}'
         else:
             previous.tail = full_text
     elif parent.text is not None:
-        parent.text = ' '.join([parent.text, full_text])
+        parent.text = f'{parent.text} {full_text}'
     else:
         parent.text = full_text
     parent.remove(element)
@@ -280,10 +263,8 @@ def xmltotxt(xmloutput, include_formatting):
         if element.text is None and element.tail is None:
             if element.tag == 'graphic':
                 # add source, default to ''
-                text = element.get('title', '')
-                if element.get('alt') is not None:
-                    text += ' ' + element.get('alt')
-                returnlist.extend(['![', text, ']', '(', element.get('src', ''), ')'])
+                text = f'{element.get("title", "")} {element.get("alt", "")}'
+                returnlist.extend(['![', text.strip(), ']', '(', element.get('src', ''), ')'])
             # newlines for textless elements
             if element.tag in ('graphic', 'row', 'table'):
                 returnlist.append('\n')
@@ -292,12 +273,7 @@ def xmltotxt(xmloutput, include_formatting):
         textelement = replace_element_text(element, include_formatting)
         # common elements
         if element.tag in NEWLINE_ELEMS:
-            returnlist.extend(['\n', textelement, '\n'])
-        # particular cases
-        elif element.tag == 'item':
-            returnlist.extend(['\n- ', textelement, '\n'])
-        elif element.tag == 'cell':
-            returnlist.extend(['|', textelement, '|'])
+            returnlist.extend([NEWLINE_ELEMS[element.tag], textelement, '\n'])
         elif element.tag == 'comments':
             returnlist.append('\n\n')
         else:
@@ -330,7 +306,7 @@ def write_teitree(docmeta):
 def _define_publisher_string(docmeta):
     '''Construct a publisher string to include in TEI header'''
     if docmeta.hostname and docmeta.sitename:
-        publisherstring = docmeta.sitename.strip() + ' (' + docmeta.hostname + ')'
+        publisherstring = f'{docmeta.sitename.strip()} ({docmeta.hostname})'
     elif docmeta.hostname:
         publisherstring = docmeta.hostname
     elif docmeta.sitename:
@@ -425,19 +401,20 @@ def write_fullheader(teidoc, docmeta):
 
 
 def _handle_text_content_of_div_nodes(element):
-    if element.text is not None and element.text.strip():
+    if element.text and element.text.strip():
         if element.getchildren() and element[0].tag == 'p':
             p_text = element[0].text or ""
-            element[0].text = ' '.join([element.text, p_text]).strip()
+            element[0].text = f'{element.text} {p_text}'.strip()
         else:
             new_child = Element("p")
             new_child.text = element.text
             element.insert(0, new_child)
         element.text = None
-    if element.tail is not None and element.tail.strip():
+
+    if element.tail and element.tail.strip():
         if element.getchildren() and element[-1].tag == 'p':
             p_text = element[-1].text or ""
-            element[-1].text = ' '.join([p_text, element.tail]).strip()
+            element[-1].text = f'{p_text} {element.tail}'.strip()
         else:
             new_child = Element("p")
             new_child.text = element.tail
@@ -505,9 +482,10 @@ def _wrap_unwanted_siblings_of_div(div_element):
 def _move_element_one_level_up(element):
     parent = element.getparent()
     new_elem = Element("p")
-    for sibling in element.itersiblings():
-        new_elem.append(sibling)
+    new_elem.extend(sibling for sibling in element.itersiblings())
+
     parent.addnext(element)
+
     if element.tail is not None and element.tail.strip():
         new_elem.text = element.tail.strip()
         element.tail = None