surf-adblock

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 888e9c8a2237bf9c374e04737f103e91cdc30267
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Thu, 14 Jul 2016 22:18:31 +0200

initial insertion (twss)

Diffstat:
LICENSE | 16++++++++++++++++
Makefile | 35+++++++++++++++++++++++++++++++++++
README | 1+
TODO | 11+++++++++++
config.mk | 32++++++++++++++++++++++++++++++++
dl.sh | 20++++++++++++++++++++
surf-adblock.c | 781+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 896 insertions(+), 0 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -0,0 +1,16 @@ +ISC License + +Copyright (c) 2016 Hiltjo Posthuma <hiltjo@codemadness.org> +Copyright (c) 2016 Quentin Rameau <quinq@fifth.space> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile @@ -0,0 +1,35 @@ +include config.mk + +SRC = surf-adblock.c +OBJ = ${SRC:.c=.lo} + +all: surf-adblock.la + +.SUFFIXES: .la .lo .o .c + +.c.o: + ${CC} -c ${CFLAGS} $< + +.c.lo: + ${LIBTOOL} --mode compile --tag CC ${CC} ${LIBCFLAGS} -c $< + +${OBJ}: config.mk + +surf-adblock.la: ${OBJ} + ${LIBTOOL} --mode link --tag CC ${CC} ${LIB} ${LIBLDFLAGS} -o $@ \ + ${OBJ} -rpath ${DESTDIR}${LIBPREFIX} + +clean: + rm -rf surf-adblock.la .libs ${OBJ} ${OBJ:.lo=.o} + +install: all surf-adblock.la + mkdir -p ${DESTDIR}${LIBPREFIX} + ${LIBTOOL} --mode install install -c surf-adblock.la \ + ${DESTDIR}${LIBPREFIX}/surf-adblock.la + +uninstall: + ${LIBTOOL} --mode uninstall rm -f \ + ${DESTDIR}${LIBPREFIX}/surf-adblock.la + rm -df ${DESTDIR}${LIBPREFIX} || true + +.PHONY: all clean install uninstall diff --git a/README b/README @@ -0,0 +1 @@ +adblock parser (WIP) diff --git a/TODO b/TODO @@ -0,0 +1,11 @@ +Docs: +- https://adblockplus.org/en/filter-cheatsheet +- https://adblockplus.org/filters + +- separate between site-specific and global block rules. +- optimize matching. +- optimize memory allocation. +- optimize: pregenerate one global stylesheet that applies to all sites? +? support exception rules #@# + + diff --git a/config.mk b/config.mk @@ -0,0 +1,32 @@ +VERSION = 0.1 + +# Customize below to fit your system + +# paths +PREFIX = /usr/local +MANPREFIX = ${PREFIX}/share/man +LIBPREFIX = ${PREFIX}/lib/surf + +X11INC = /usr/X11R6/include +X11LIB = /usr/X11R6/lib + +GTKINC = `pkg-config --cflags gtk+-3.0 webkit2gtk-4.0` +GTKLIB = `pkg-config --libs gtk+-3.0 webkit2gtk-4.0` +WEBEXTINC = `pkg-config --cflags webkit2gtk-4.0 webkit2gtk-web-extension-4.0` +WEBEXTLIB = `pkg-config --libs webkit2gtk-4.0 webkit2gtk-web-extension-4.0` + +# includes and libs +INCS = -I. -I/usr/include -I${X11INC} ${GTKINC} +LIBS = -L/usr/lib -lc -L${X11LIB} -lX11 ${GTKLIB} -lgthread-2.0 + +# flags +CPPFLAGS = -DVERSION=\"${VERSION}\" -DWEBEXTDIR=\"${LIBPREFIX}\" -D_DEFAULT_SOURCE +CFLAGS = -std=c99 -pedantic -Wall -Os ${INCS} ${CPPFLAGS} +LDFLAGS = -s ${LIBS} +LIBCPPFLAGS = -DWEBEXTDIR=\"${LIBPREFIX}\" -DWEBKIT_DOM_USE_UNSTABLE_API +LIBCFLAGS = -std=c99 -pedantic -Wall -Os ${WEBEXTINC} ${LIBCPPFLAGS} +LIBLDFLAGS = -s ${WEBEXTLIB} -module -avoid-version -no-undefined + +# compiler and linker +CC = cc +LIBTOOL = libtool --quiet diff --git a/dl.sh b/dl.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# download adblock lists + +set -e + +(while read -r l; do + curl -L "$l" + echo "" # not all lists have line at EOF. +done <<! +https://easylist.github.io/easylist/easylist.txt +https://easylist.github.io/easylist/easyprivacy.txt +https://easylist-downloads.adblockplus.org/antiadblockfilters.txt +https://easylist-downloads.adblockplus.org/easylistdutch.txt +https://easylist.github.io/easylistgermany/easylistgermany.txt +https://easylist-downloads.adblockplus.org/liste_fr.txt +https://easylist.github.io/easylist/fanboy-annoyance.txt +https://easylist.github.io/easylist/fanboy-social.txt +! +) | awk '{if(!x[$0]++){print $0;}}' > list +# remove duplicate lines but keep the order. diff --git a/surf-adblock.c b/surf-adblock.c @@ -0,0 +1,781 @@ +#include <sys/stat.h> +#include <sys/types.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> + +#include <webkit2/webkit-web-extension.h> +#include <webkitdom/webkitdom.h> +#include <webkitdom/WebKitDOMDOMWindowUnstable.h> + +typedef struct Page { + guint64 id; + WebKitWebPage *webpage; + WebKitDOMDOMWindow *view; + struct Page *next; +} Page; + +struct filterdomain { + char *domain; + int inverse; + struct filterdomain *next; +}; + +struct filterrule { + char *line; /* DEBUG */ + /* type: match mask, must be atleast 32-bit, see FilterType enum */ + unsigned long block; + int matchbegin; + int matchend; + /* is exception rule: prefix @@ for ABP or #@# for CSS */ + int isexception; + char *css; /* if non-NULL is CSS rule / hide element rule */ + char *uri; + struct filterdomain *domains; + struct filterrule *next; +}; + +enum { + FilterTypeScript = 1 << 0, + FilterTypeImage = 1 << 1, + FilterTypeCSS = 1 << 2, + FilterTypeObject = 1 << 3, + FilterTypeXHR = 1 << 4, + FilterTypeObjectSub = 1 << 5, + FilterTypeSubDoc = 1 << 6, + FilterTypePing = 1 << 7, + FilterTypeDocument = 1 << 8, + FilterTypeElemHide = 1 << 9, + FilterTypeOther = 1 << 10, + FilterTypeGenericHide = 1 << 11, + FilterTypeGenericBlock = 1 << 12, + FilterTypeMatchCase = 1 << 13, +}; + +struct filtertype { + /* `type` must be atleast 32-bit, see FilterType enum */ + unsigned long type; + char *name; + size_t namelen; + int allowinverse; + int allownormal; + int onlyexception; + void (*fn)(struct filterrule *, char *); +}; + +static void parsedomainsoption(struct filterrule *, char *); + +#define STRP(s) s,sizeof(s)-1 + +struct filtertype filtertypes[] = { + /* NOTE: options with 'type' = 0 are silently ignored and treated as + * requests for now */ + { 0, STRP("collapse"), 1, 1, 0, NULL }, + { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL }, + { 0, STRP("domain"), 0, 1, 0, &parsedomainsoption }, /* domain=... */ + { 0, STRP("donottrack"), 1, 1, 0, NULL }, + { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL }, + { 0, STRP("font"), 1, 1, 0, NULL }, + { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL }, + { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL }, + { FilterTypeImage, STRP("image"), 1, 1, 0, NULL }, + { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL }, + { 0, STRP("media"), 1, 1, 0, NULL }, + { FilterTypeObject, STRP("object"), 1, 1, 0, NULL }, + { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL }, + { FilterTypeOther, STRP("other"), 1, 1, 0, NULL }, + { FilterTypePing, STRP("ping"), 1, 1, 0, NULL }, + { 0, STRP("popup"), 1, 1, 0, NULL }, + { FilterTypeScript, STRP("script"), 1, 1, 0, NULL }, + { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL }, + { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL }, + { 0, STRP("third-party"), 1, 1, 0, NULL }, + { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL }, + /* NOTE: site-key not supported */ +}; + +static Page *pages; +static char globalcss[5000000]; /* TEST: dynamic allocate later */ +static struct filterrule *rules; + +void * +ecalloc(size_t nmemb, size_t size) +{ + void *p; + + if (!(p = calloc(nmemb, size))) { + fprintf(stderr, "calloc: %s\n", strerror(errno)); + exit(1); + } + + return p; +} + +char * +estrdup(const char *s) +{ + char *p; + + if (!(p = strdup(s))) { + fprintf(stderr, "strdup: %s\n", strerror(errno)); + exit(1); + } + + return p; +} + +#define END 0 +#define UNMATCHABLE -2 +#define BRACKET -3 +#define CARET -4 +#define STAR -5 + +static int +str_next(const char *str, size_t n, size_t *step) +{ + if (!n) { + *step = 0; + return 0; + } + if (str[0] >= 128U) { + wchar_t wc; + int k = mbtowc(&wc, str, n); + if (k<0) { + *step = 1; + return -1; + } + *step = k; + return wc; + } + *step = 1; + + return str[0]; +} + +static int +pat_next(const char *pat, size_t m, size_t *step) +{ + int esc = 0; + + if (!m || !*pat) { + *step = 0; + return END; + } + *step = 1; + if (pat[0]=='\\' && pat[1]) { + *step = 2; + pat++; + esc = 1; + goto escaped; + } + if (pat[0]=='^') + return CARET; + if (pat[0] == '*') + return STAR; +escaped: + if (pat[0] >= 128U) { + wchar_t wc; + int k = mbtowc(&wc, pat, m); + if (k<0) { + *step = 0; + return UNMATCHABLE; + } + *step = k + esc; + return wc; + } + return pat[0]; +} + +static int +casefold(int k) +{ + int c = towupper(k); + return c == k ? towlower(k) : c; +} + +/* match() based on musl-libc fnmatch: + https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */ +int +match(const char *pat, const char *str, int fcase) +{ + size_t m = -1, n = -1; + const char *p, *ptail, *endpat; + const char *s, *stail, *endstr; + size_t pinc, sinc, tailcnt=0; + int c, k, kfold; + + for (;;) { + switch ((c = pat_next(pat, m, &pinc))) { + case UNMATCHABLE: + return 1; + case STAR: + pat++; + m--; + break; + default: + k = str_next(str, n, &sinc); + if (k <= 0) + return (c==END) ? 0 : 1; + if (c == CARET && (iswdigit(k) || iswalpha(k) || strchr("_-.%", k))) + return 1; + str += sinc; + n -= sinc; + kfold = fcase ? casefold(k) : k; + if (k != c && kfold != c) + return 1; + pat+=pinc; + m-=pinc; + continue; + } + break; + } + + /* Compute real pat length if it was initially unknown/-1 */ + m = strnlen(pat, m); + endpat = pat + m; + + /* Find the last * in pat and count chars needed after it */ + for (p=ptail=pat; p<endpat; p+=pinc) { + switch (pat_next(p, endpat-p, &pinc)) { + case UNMATCHABLE: + return 1; + case STAR: + tailcnt=0; + ptail = p+1; + break; + default: + tailcnt++; + break; + } + } + + /* Past this point we need not check for UNMATCHABLE in pat, + * because all of pat has already been parsed once. */ + + /* Compute real str length if it was initially unknown/-1 */ + n = strnlen(str, n); + endstr = str + n; + if (n < tailcnt) return 1; + + /* Find the final tailcnt chars of str, accounting for UTF-8. + * On illegal sequences we may get it wrong, but in that case + * we necessarily have a matching failure anyway. */ + for (s=endstr; s>str && tailcnt; tailcnt--) { + if (s[-1] < 128U || MB_CUR_MAX==1) s--; + else while ((unsigned char)*--s-0x80U<0x40 && s>str); + } + if (tailcnt) return 1; + stail = s; + + /* Check that the pat and str tails match */ + p = ptail; + for (;;) { + c = pat_next(p, endpat-p, &pinc); + p += pinc; + if ((k = str_next(s, endstr-s, &sinc)) <= 0) { + if (c != END) return 1; + break; + } + s += sinc; + kfold = fcase ? casefold(k) : k; + if (k != c && kfold != c) + return 1; + } + + /* We're all done with the tails now, so throw them out */ + endstr = stail; + endpat = ptail; + + /* Match pattern components until there are none left */ + while (pat<endpat) { + p = pat; + s = str; + for (;;) { + c = pat_next(p, endpat-p, &pinc); + p += pinc; + /* Encountering * completes/commits a component */ + if (c == STAR) { + pat = p; + str = s; + break; + } + k = str_next(s, endstr-s, &sinc); + if (!k) + return 1; + kfold = fcase ? casefold(k) : k; + if (k != c && kfold != c) + break; + s += sinc; + } + if (c == STAR) continue; + /* If we failed, advance str, by 1 char if it's a valid + * char, or past all invalid bytes otherwise. */ + k = str_next(str, endstr-str, &sinc); + if (k > 0) str += sinc; + else for (str++; str_next(str, endstr-str, &sinc)<0; str++); + } + + return 0; +} + + +/* +domain=... if domain is prefixed with ~, ignore. +multiple domains can be separated with | +*/ +struct filterdomain * +parsedomains(char *s, int sep) +{ + struct filterdomain *head = NULL, *d, *last = NULL; + char *p; + int inverse; + + do { + inverse = 0; + if (*s == '~') { + inverse = !inverse; + s++; + } + if (!*s || *s == sep) + break; + + if ((p = strchr(s, sep))) /* TODO: should not contain , */ + *p = '\0'; + + d = ecalloc(1, sizeof(struct filterdomain)); + d->inverse = inverse; + d->domain = estrdup(s); + + if (!head) + head = last = d; + else + last = last->next = d; + + if (p) { + *p = sep; + s = p + 1; + } + } while (p); + + return head; +} + +void +parsedomainselement(struct filterrule *f, char *s) +{ + struct filterdomain *d, *last; + + for (last = f->domains; last && last->next; last = last->next) + ; + + d = parsedomains(s, ','); + if (last) + last->next = d; + else + f->domains = d; +} + +void +parsedomainsoption(struct filterrule *f, char *s) +{ + struct filterdomain *d, *last; + + for (last = f->domains; last && last->next; last = last->next) + ; + + d = parsedomains(s, '|'); + if (last) + last->next = d; + else + f->domains = d; +} + +int +filtertype_cmp(const void *a, const void *b) +{ + return strcmp(((struct filtertype *)a)->name, ((struct filtertype *)b)->name); +} + +/* check if domain is the same domain or a subdomain of `s` */ +int +matchdomain(const char *s, const char *domain) +{ + size_t l1, l2; + + l1 = strlen(s); + l2 = strlen(domain); + + /* subdomain-specific (longer) or other domain */ + if (l1 > l2) + return 0; + /* subdomain */ + if (l2 > l1 && domain[l2 - l1 - 1] == '.') + return !strcmp(&domain[l2 - l1], s); + + return !strcmp(s, domain); +} + +int +matchrule(struct filterrule *f, const char *uri, const char *type, const char *domain) +{ + /* NOTE: order matters, see FilterType enum values */ + const char *blockstr = "sicoxOSpde^"; + struct filterdomain *d; + char pat[1024]; + int r, i; + + /* ignore exception rules for now, these are usually paid + * for by sites to allow advertisements. */ + if (f->isexception) + return 0; + + if (f->css) { + r = f->domains ? 0 : 1; + for (d = f->domains; d; d = d->next) { + if (matchdomain(d->domain, domain)) { + if (r && d->inverse) + r = 0; + else if (!r && !d->inverse) + r = 1; + } else if (r && !d->inverse) { + r = 0; + } + } + return r; + } + + r = snprintf(pat, sizeof(pat), "%s%s%s", + f->matchbegin ? "" : "*", + f->uri, + f->matchend ? "" : "*"); + if (r == -1 || (size_t)r >= sizeof(pat)) { + fprintf(stderr, "warning: pattern too large, ignoring\n"); + return 0; + } + + r = f->domains ? 0 : 1; + for (d = f->domains; d; d = d->next) { + if (matchdomain(d->domain, domain)) { + if (r && d->inverse) + r = 0; + else if (!r && !d->inverse) + r = 1; + } else if (r && !d->inverse) { + r = 0; + } + } + + if (r && !match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) { + for (; *type; type++) { + for (i = 0; blockstr[i]; i++) { + if (blockstr[i] == *type && + f->block & (1 << i)) { + printf("block type '%c'\n", blockstr[i]); + } + } + } + return 1; + } + return 0; +} + +int +parserule(struct filterrule *f, char *s) +{ + struct filtertype key, *ft; + int inverse = 0; + char *p, *values; + + if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']')) + return 0; /* skip comment or empty line */ + for (; *s && isspace(*s); s++) + ; + if (!*s) + return 0; /* line had only whitespace: skip */ + + memset(f, 0, sizeof(struct filterrule)); + + if ((p = strstr(s, "#@#"))) { + *p = '\0'; + parsedomainselement(f, s); + *p = '#'; + f->css = estrdup(p + 3); + f->isexception = 1; + goto end; /* end of CSS rule */ + } + + /* element hiding rule, NOTE: no wildcards are supported, + "Simplified element hiding syntax" is not supported. */ + if ((p = strstr(s, "##"))) { + *p = '\0'; + parsedomainselement(f, s); + *p = '#'; + f->css = estrdup(p + 2); + goto end; /* end of rule */ + } + + if (!strncmp(s, "@@", 2)) { + f->isexception = 1; + s += 2; + } + if (*s == '|') { + s++; + if (*s == '|') { + f->matchbegin = 1; + s++; + } else { + f->matchend = 1; + } + } + + /* no options, use rest of line as uri. */ + if (!(p = strrchr(s, '$'))) { + f->uri = estrdup(s); + goto end; + } + + /* has options */ + *p = '\0'; + f->uri = estrdup(s); + *p = '$'; + s = ++p; + + /* blockmask, has options? default: allow all options, case-sensitive + * has no options? default: block all options, case-sensitive */ + f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL; + do { + if ((p = strchr(s, ','))) + *p = '\0'; + /* match option */ + inverse = 0; + if (*s == '~') { + inverse = 1; + s++; + } + if ((values = strchr(s, '='))) + *(values) = '\0'; + key.name = s; + + ft = bsearch(&key, &filtertypes, + sizeof(filtertypes) / sizeof(*filtertypes), sizeof(*filtertypes), + filtertype_cmp); + + /* restore NUL-terminator for domain= option */ + if (values) + *(values++) = '='; + + if (ft) { + if (inverse) + f->block &= ~(ft->type); + else + f->block |= ft->type; + if (ft->fn && values) + ft->fn(f, values); + } else { + /* DEBUG */ + fprintf(stderr, "unknown option: '%s' in rule: %s\n", + key.name, f->uri); + } + + /* restore ',' */ + if (p) { + *p = ','; + s = p + 1; + } + } while (p); +end: + + return 1; +} + +void +debugrule(struct filterrule *r) +{ + printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: %lu\n===\n", + r->uri ? r->uri : "", r->css ? r->css : "", r->isexception, r->block); +} + +struct filterrule * +loadrules(FILE *fp) +{ + char *line = NULL; + size_t linesiz = 0; + ssize_t n; + struct filterrule f, *r, *rn = NULL, *rules = NULL; + + /* TODO: handle ferror() */ + /* load rules */ + while ((n = getline(&line, &linesiz, fp)) > 0) { + if (line[n - 1] == '\n') + line[--n] = '\0'; + if (n > 0 && line[n - 1] == '\r') + line[--n] = '\0'; + + if (parserule(&f, line)) { + r = ecalloc(1, sizeof(struct filterrule)); + if (!rules) + rules = rn = r; + else + rn = rn->next = r; + memcpy(rn, &f, sizeof(struct filterrule)); + r->line = estrdup(line); /* DEBUG */ + } + } + return rules; +} + +Page * +newpage(WebKitWebPage *page) +{ + Page *p; + + p = ecalloc(1, sizeof(Page)); + p->next = pages; + pages = p; + + p->id = webkit_web_page_get_id(page); + p->webpage = page; + + return p; +} + +static void +documentloaded(WebKitWebPage *wp, Page *p) +{ + WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp); + WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc); + WebKitDOMElement *el; + char sitecss[1000000] = ""; /* TODO: dynamic allocate */ + struct filterrule *r; + char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage)); + char *domain, *s; + + /* TODO: improve, hacky */ + if ((domain = strstr(uri, "://"))) { + domain += sizeof("://") - 1; + } else { + domain = uri; + } + if ((s = strchr(domain, '/'))) + *s = '\0'; + + printf("uri: %s\n", uri); + printf("domain: %s\n", domain); + + /* site-specific CSS */ + for (r = rules; r; r = r->next) { + if (!r->css || !r->domains || !matchrule(r, "", "", domain)) + continue; + strlcat(sitecss, r->css, sizeof(sitecss)); + strlcat(sitecss, "{display:none;}", sizeof(sitecss)); + } + printf("sitecss: %s\n", sitecss); + + p->view = webkit_dom_document_get_default_view(doc); + + el = webkit_dom_document_create_element(doc, "style", NULL); + webkit_dom_element_set_attribute(el, "type", "text/css", NULL); + webkit_dom_element_set_inner_html(el, globalcss, NULL); + webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el), NULL); + + el = webkit_dom_document_create_element(doc, "style", NULL); + webkit_dom_element_set_attribute(el, "type", "text/css", NULL); + webkit_dom_element_set_inner_html(el, sitecss, NULL); + webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el), NULL); + + free(uri); +} + +static gboolean +sendrequest(WebKitWebPage *wp, WebKitURIRequest *req, + WebKitURIResponse *res, Page *p) +{ + struct filterrule *r; + + char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage)); + char *requri = estrdup((char *)webkit_uri_request_get_uri(req)); + char *domain, *s; + + /* TODO: improve, hacky */ + if ((domain = strstr(uri, "://"))) { + domain += sizeof("://") - 1; + } else { + domain = uri; + } + + if ((s = strchr(domain, '/'))) + *s = '\0'; + + /* match rules */ + for (r = rules; r; r = r->next) { + if (!r->css && matchrule(r, requri, "csio^", domain)) { + printf("requri: %s\n", requri); + printf("uri: %s\n", uri); + printf("domain: %s\n", domain); + + fprintf(stderr, "blocked: %s, %s\n", domain, requri); + free(uri); + free(requri); + return TRUE; + } + } + free(uri); + free(requri); + + return FALSE; +} + +static void +objectcleared(WebKitScriptWorld *w, WebKitWebPage *wp, WebKitFrame *f, Page *p) +{ +} + +static void +webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused) +{ + Page *np = newpage(p); + WebKitScriptWorld *w = webkit_script_world_get_default(); + + g_signal_connect(p, "send-request", + G_CALLBACK(sendrequest), np); + g_signal_connect(w, "window-object-cleared", + G_CALLBACK(objectcleared), np); + g_signal_connect(p, "document-loaded", + G_CALLBACK(documentloaded), np); +} + +G_MODULE_EXPORT void +webkit_web_extension_initialize_with_user_data(WebKitWebExtension *e, GVariant *gv) +{ + struct filterrule *r; + FILE *fp; + const char *filename = "/home/hiltjo/adblock_rules"; + + if (!(fp = fopen(filename, "r"))) { + fprintf(stderr, "cannot read rules from file: %s\n", filename); + return; + } + if (!(rules = loadrules(fp))) { + fprintf(stderr, "cannot load adblock rules\n"); + return; + } + fclose(fp); + + /* general rules: all sites */ + for (r = rules; r; r = r->next) { + if (!r->css || r->domains) + continue; + strlcat(globalcss, r->css, sizeof(globalcss)); + strlcat(globalcss, "{display:none;}", sizeof(globalcss)); + } + + g_signal_connect(e, "page-created", + G_CALLBACK(webpagecreated), NULL); +}