Last active 6 months ago

go lib to scope with BPJM Lists

bpjm.go Raw
1// go library to scope with the BPjM Censorship list
2
3package Bpjm
4
5import (
6 "net/url"
7 "regexp"
8 "crypto/md5"
9 "strings"
10 "bytes"
11 "io/ioutil"
12 "encoding/binary"
13 "io"
14)
15
16// BpjmEntry is a datastructure for BPjM List Entries
17type BpjmEntry struct {
18 DomainMd5 []byte // Md5 sum of domain, with a leading http
19 PathMd5 []byte
20 Depth int32
21}
22
23// parseURL - takes an URL string as input and returns a pointer to an url.URL object
24func ParseURL(ustr string) (u *url.URL, err error) {
25 u, err = url.Parse(ustr)
26 return u, err
27}
28
29// XtrctHost - takes a url object and returns the host
30func XtrctHost(u *url.URL) (host string) {
31 return u.Host
32}
33
34// XtrctPath - takes an url object and returns the path
35func XtrctPath(u *url.URL) (path string) {
36 return u.Path
37}
38
39// FilterHost - Filters host according to BPjM Host Filters
40// effectivley it cuts away a leading 'www.' subdomain, if existing
41func FilterHost(host string) (fhost string) {
42 re := regexp.MustCompile("^www.")
43 fhost = re.ReplaceAllString(host, "")
44 return fhost
45}
46
47// FilterPath - Filters path according to BPjM Path Filters
48// effectivley cuts away the root directory from path, if existing
49func FilterPath(path string) (fpath string) {
50 re := regexp.MustCompile("^/")
51 fpath = re.ReplaceAllString(path, "")
52 return fpath
53}
54
55// GenMd5 - Generates n MD5 checksum of a given string and returns the md5sum as array of bytes
56func GenMd5(in string) (md5sum []byte) {
57 inb := []byte(in)
58 md5 := md5.Sum(inb)
59 for _, c := range md5 {
60 md5sum = append(md5sum, c)
61 }
62 return md5sum
63}
64
65// check - is a general error checking routine that panics on error
66func check(err error) {
67 if err != nil {
68 panic(err)
69 }
70}
71
72// enumerate depth of a given URL
73func EnumerateDepth(u *url.URL) (depth int32) {
74 path := XtrctPath(u)
75 i := len(strings.Split(path, "/"))
76 return int32(i)
77}
78
79// UrlToBpjmEntry - takes an URL string as input and returns a BpjmEntry object
80func UrlToBpjmEntry(ustr string) (entry BpjmEntry) {
81 u, err := ParseURL(ustr)
82 check(err)
83 host := FilterHost( XtrctHost(u) )
84 path := FilterPath( XtrctPath(u) )
85 depth := EnumerateDepth(u)
86 e := BpjmEntry{
87 GenMd5("http://" + host),
88 GenMd5(path),
89 depth,
90 }
91 return e
92}
93
94// Load a binary BPjM ListFile from a Fritzbox
95func LoadFritzBoxFile(filename string)(data []byte) {
96 data, _ = ioutil.ReadFile(filename)
97 return data
98}
99
100// FritzBoxFile - datastructure to describe an Fritzbox BPjM File
101type FritzBoxFile struct {
102 Magick []byte
103 EmbedFileName string
104 Size int
105 Records int
106 Entries []BpjmEntry
107}
108
109func ParseFritzBoxFile(data []byte)(FBF FritzBoxFile){
110 // set up a buffer we can work with
111 buffer := bytes.NewBuffer(data)
112 // get magick; first 5 bytes
113 magick := buffer.Next(5)
114 // get embedded filename; next 59 bytes; cut out the null bytes and make it a string
115 embedFileName := string( bytes.Trim(buffer.Next(59), string(0x00)) )
116 // get size of file in bytes
117 size := buffer.Len()
118 // prepager a list to store entries
119 var Entries []BpjmEntry
120 // init counter
121 n := 0
122 // for each entry in list...
123 for {
124 // ... read domain hash
125 d := make([]byte, 16)
126 _, err := buffer.Read(d)
127 if err != nil {
128 if err == io.EOF {
129 break
130 }
131 panic(err)
132 }
133 // ...read path hash
134 p := make([]byte, 16)
135 _, err = buffer.Read(p)
136 if err != nil {
137 if err == io.EOF {
138 break
139 }
140 panic(err)
141 }
142 //...read depth
143 i, err := binary.ReadUvarint(buffer)
144 if err != nil {
145 if err == io.EOF {
146 break
147 }
148 panic(err)
149 }
150 // parse read data into an BpjmEntry object
151 e := BpjmEntry{
152 d,
153 p,
154 int32(i),
155 }
156 // append object to list of objects
157 Entries = append(Entries, e)
158 // count
159 n++
160 }
161 // parse collected data into a FirtzBoxFile Object
162 FBF = FritzBoxFile{
163 magick,
164 embedFileName,
165 size,
166 n,
167 Entries,
168 }
169 // return the FBF Object
170 return FBF
171}