whoogle-search/test/test_results.py

from bs4 import BeautifulSoup
from app.filter import Filter
from app.utils.session import generate_user_key
from datetime import datetime
from dateutil.parser import *
from urllib.parse import urlparse

from test.conftest import demo_config


def get_search_results(data):
    secret_key = generate_user_key()
    soup = Filter(user_key=secret_key).clean(
        BeautifulSoup(data, 'html.parser'))

    main_divs = soup.find('div', {'id': 'main'})
    assert len(main_divs) > 1

    result_divs = []
    for div in main_divs:
        # Result divs should only have 1 inner div
        if (len(list(div.children)) != 1
                or not div.findChild()
                or 'div' not in div.findChild().name):
            continue

        result_divs.append(div)

    return result_divs


def test_get_results(client):
    rv = client.get('/search?q=test')
    assert rv._status_code == 200

    # Depending on the search, there can be more
    # than 10 result divs
    assert len(get_search_results(rv.data)) >= 10
    assert len(get_search_results(rv.data)) <= 15


def test_post_results(client):
    rv = client.post('/search', data=dict(q='test'))
    assert rv._status_code == 200

    # Depending on the search, there can be more
    # than 10 result divs
    assert len(get_search_results(rv.data)) >= 10
    assert len(get_search_results(rv.data)) <= 15


def test_block_results(client):
    rv = client.post('/search', data=dict(q='pinterest'))
    assert rv._status_code == 200

    has_pinterest = False
    for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
        if 'pinterest.com' in urlparse(link['href']).netloc:
            has_pinterest = True
            break

    assert has_pinterest

    demo_config['block'] = 'pinterest.com'
    rv = client.post('/config', data=demo_config)
    assert rv._status_code == 302

    rv = client.post('/search', data=dict(q='pinterest'))
    assert rv._status_code == 200

    for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
        assert 'pinterest.com' not in urlparse(link['href']).netloc


# TODO: Unit test the site alt method instead -- the results returned
# are too unreliable for this test in particular.
# def test_site_alts(client):
    # rv = client.post('/search', data=dict(q='twitter official account'))
    # assert b'twitter.com/Twitter' in rv.data

    # client.post('/config', data=dict(alts=True))
    # assert json.loads(client.get('/config').data)['alts']

    # rv = client.post('/search', data=dict(q='twitter official account'))
    # assert b'twitter.com/Twitter' not in rv.data
    # assert b'nitter.net/Twitter' in rv.data


def test_recent_results(client):
    times = {
        'past year': 365,
        'past month': 31,
        'past week': 7
    }

    for time, num_days in times.items():
        rv = client.post('/search', data=dict(q='test :' + time))
        result_divs = get_search_results(rv.data)

        current_date = datetime.now()
        for div in [_ for _ in result_divs if _.find('span')]:
            date_span = div.find('span').decode_contents()
            if not date_span or len(date_span) > 15 or len(date_span) < 7:
                continue

            try:
                date = parse(date_span)
                # Date can have a little bit of wiggle room
                assert (current_date - date).days <= (num_days + 5)
            except ParserError:
                pass