mainz
clone your own copy | download snapshot

Snapshots | iceberg

Inside this repository

get_wikipedia_page copy.py
text/x-python

Download raw (878 bytes)

import wikipediaapi

wiki_html = wikipediaapi.Wikipedia(language='en', extract_format=wikipediaapi.ExtractFormat.HTML)

def request_page_content(request):

    page = wiki_html.page(request)

    #checking if the page exists
    print("Page - Exists: %s" % page.exists())

    #cleaning the html content we get from inline style
    p_split = page.text.split("<")
    p_clean = []

    for t in p_split:
        if t.startswith("link") or t == "":
            pass
        else:
            new_t = "<" + t
            p_clean.append(new_t)

    # adding the original request
    # and the content as custom attribute
    setattr(page, 'request', request)
    setattr(page, 'content', ''.join(p_clean))

    # all the attribute we have access to are here:
    # https://wikipedia-api.readthedocs.io/en/latest/API.html?highlight=WikipediaPage#wikipediapage
    
    return page