Go Programming Language Cookbook/Printable version
Appearance
This is the print version of Go Programming Language Cookbook You won't see this message or any elements not part of the book's content when you print or preview this page. |
Go Programming Language Cookbook
The current, editable version of this book is available in Wikibooks, the open-content textbooks collection, at
https://en.wikibooks.org/wiki/Go_Programming_Language_Cookbook
Permission is granted to copy, distribute, and/or modify this document under the terms of the Creative Commons Attribution-ShareAlike 3.0 License.
Bit map
Given a big file of IPv4 addresses (more than 100 GB) — we need to count unique addresses. If we use generic map[string]bool
— we will need more than 64 GB of RAM, so lets use the bit map:
package main
import (
"bufio"
"fmt"
"math/bits"
"os"
)
// bitsetSize is the number of bytes needed for 2^32 bits (512 MiB)
const bitsetSize = 1 << 29
func main() {
file, err := os.Open("ip_addresses")
if err != nil {
fmt.Println("Error opening file:", err)
return
}
defer file.Close()
bitset := [bitsetSize]byte{}
// Use a buffered scanner with a larger buffer
scanner := bufio.NewScanner(file)
const maxBuffer = 64 * 1024 // 64 KB buffer
buf := make([]byte, 0, maxBuffer)
scanner.Buffer(buf, maxBuffer)
// Process each line
for scanner.Scan() {
line := scanner.Bytes()
// Parse the IP address manually from bytes
ip := parseIPv4(line)
// Set the bit
byteIndex := ip >> 3 // Divide by 8
bitIndex := ip & 7 // Bit position 0-7
bitset[byteIndex] |= 1 << bitIndex
}
// Check for scanning errors
if err := scanner.Err(); err != nil {
fmt.Println("Error reading file:", err)
return
}
var count uint64
for i := 0; i < bitsetSize; i++ {
count += uint64(bits.OnesCount8(bitset[i]))
}
fmt.Println("Number of unique IPv4 addresses:", count)
}
func parseIPv4(line []byte) (ip uint32) {
i := 0
// Octet 1
n := uint32(line[i] - '0')
for i = 1; line[i] != '.'; i++ {
n = n*10 + uint32(line[i]-'0')
}
ip |= n << 24
i++ // Skip the dot
// Octet 2
n = uint32(line[i] - '0')
i++
for ; line[i] != '.'; i++ {
n = n*10 + uint32(line[i]-'0')
}
ip |= n << 16
i++ // Skip the dot
// Octet 3
n = uint32(line[i] - '0')
i++
for ; line[i] != '.'; i++ {
n = n*10 + uint32(line[i]-'0')
}
ip |= n << 8
i++ // Skip the dot
// Octet 4
n = uint32(line[i] - '0')
i++
for ; i < len(line); i++ {
n = n*10 + uint32(line[i]-'0')
}
ip |= n
return ip
}