Loading... # 引言 早期使用python进行合并,但是性能太低,随后用go重构了一版,因为不太会用go,代码部分借助AI进行生成。构建基础字典数据,姓氏+生日 将多个文件合并到一起,并且进行去重排序。 # 代码 ```go package main import ( "bufio" "fmt" "io/fs" "os" "path/filepath" "regexp" "runtime" "sort" "strings" "sync" "time" "unicode" ) var ( RED = "\033[91m" GREEN = "\033[92m" YELLOW = "\033[93m" RESET = "\033[0m" excludeExts = map[string]struct{}{ ".py": {}, ".bak": {}, ".cmd": {}, ".bat": {}, ".sh": {}, ".vbs": {}, ".ps1": {}, ".js": {}, ".html": {}, ".htm": {}, ".php": {}, ".sql": {}, ".conf": {}, ".ini": {}, ".log": {}, ".md": {}, ".xlsx": {}, ".xls": {}, ".docx": {}, ".doc": {}, ".pptx": {}, ".ppt": {}, ".pdf": {}, ".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {}, ".bmp": {}, ".ico": {}, ".exe": {}, } ) func generateDateBasedPasswords(days int, prefixes []string) []string { now := time.Now() passwdSet := make([]string, 0, days*len(prefixes)) for i := 0; i < days; i++ { dateStr := now.AddDate(0, 0, -i).Format("20060102") for _, prefix := range prefixes { passwdSet = append(passwdSet, prefix+dateStr) } } return passwdSet } func filterAllowedChars(s string) string { var b strings.Builder for _, r := range s { if r >= 33 && r <= 126 { b.WriteRune(r) } // 如果想保留空格,可加上: // else if r == 32 { // b.WriteRune(r) // } } return b.String() } func getFileSize(path string) (int64, error) { info, err := os.Stat(path) if err != nil { return 0, err } return info.Size(), nil } // 格式化文件大小成可读格式 func humanFileSize(size int64) string { const unit = 1024 if size < unit { return fmt.Sprintf("%.2fB", float64(size)) } div, exp := int64(unit), 0 for n := size / unit; n >= unit; n /= unit { div *= unit exp++ } units := []string{"KB", "MB", "GB", "TB"} return fmt.Sprintf("%.2f%s", float64(size)/float64(div), units[exp]) } func cleanEdgesIfNeeded(s string) string { if len(strings.TrimSpace(s)) == 0 { return "" } // 判断头尾是否都是可打印字符 if isPrintable(rune(s[0])) && isPrintable(rune(s[len(s)-1])) { return s } // 去除两端不可打印字符(strip + 过滤非printable) trimmed := strings.TrimSpace(s) var b strings.Builder for _, c := range trimmed { if isPrintable(c) && c != '\x0b' && c != '\x0c' { b.WriteRune(c) } } return b.String() } func isPrintable(r rune) bool { // 类似 Python 的 string.printable 判断 if unicode.IsPrint(r) && r != '\x0b' && r != '\x0c' { return true } return false } func processFile(filePath string, minLength, maxLength int) ([]string, string, string, string) { start := time.Now() emailRegex := regexp.MustCompile(`(?i)[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}`) file, err := os.Open(filePath) if err != nil { return nil, fmt.Sprintf("Error opening %s: %v", filepath.Base(filePath), err), "0.00", "" } defer file.Close() var passwords []string scanner := bufio.NewScanner(file) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if len(line) < minLength { continue } // 处理引号 if (strings.HasPrefix(line, "'") && strings.HasSuffix(line, "'") || strings.HasPrefix(line, "\"") && strings.HasSuffix(line, "\"")) && len(line) > 1 { line = line[1 : len(line)-1] } // 清理特殊HTML编码 if strings.Contains(line, "&") { line = strings.ReplaceAll(line, "<", "") line = strings.ReplaceAll(line, ">", "") line = strings.ReplaceAll(line, "&", "&") line = strings.ReplaceAll(line, "!", "!") line = strings.ReplaceAll(line, "&", "&") line = strings.ReplaceAll(line, """, "\"") line = strings.ReplaceAll(line, "'", "'") line = strings.ReplaceAll(line, "<", "<") line = strings.ReplaceAll(line, ">", ">") } if emailRegex.MatchString(line) { continue // 跳过包含邮箱的行 } // 清理边缘不可见字符 line = cleanEdgesIfNeeded(line) line = filterAllowedChars(line) if len(line) >= minLength && len(line) < maxLength { passwords = append(passwords, line) } } elapsed := time.Since(start).Seconds() size, _ := getFileSize(filePath) return passwords, filepath.Base(filePath), humanFileSize(size), fmt.Sprintf("%.2f", elapsed) } func formatWithComma(n int) string { s := fmt.Sprintf("%d", n) nStr := "" for i, c := range reverseString(s) { if i > 0 && i%3 == 0 { nStr = "," + nStr } nStr = string(c) + nStr } return nStr } func reverseString(s string) string { runes := []rune(s) for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { runes[i], runes[j] = runes[j], runes[i] } return string(runes) } func main() { startTime := time.Now() aDir := `d:\tools\密码破解\Dict\` outputFile := filepath.Join(filepath.Dir(aDir), "new.txt") maxWorkers := runtime.NumCPU() fmt.Printf("[#] Max workers: %s%d%s\n", GREEN, maxWorkers, RESET) fmt.Printf("[$] Start time: %s\n", time.Now().Format("2006-01-02 15:04:05")) fmt.Println("[#] Generating date-based passwords...") prefixes := []string{"", "li", "lin", "duan", "zhao", "sun", "tian", "he", "an", "guo", "zhang", "zhou", "wu", "wang", "du", "cao", "kong", "chen", "liu", "han", "tang", "ma", "zheng"} passwdSet := generateDateBasedPasswords(36865, prefixes) fmt.Printf("[#] Generated %s%s%s date+prefix passwords\n", GREEN, formatWithComma(len(passwdSet)), RESET) // 扫描目录文件 fmt.Println("[#] Scanning files in:", aDir) var filePaths []string err := filepath.WalkDir(aDir, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } // 如果是目录且不是根目录,跳过(不再向下递归) if d.IsDir() && path != aDir { return filepath.SkipDir } if !d.IsDir() { ext := strings.ToLower(filepath.Ext(path)) if _, excluded := excludeExts[ext]; !excluded { filePaths = append(filePaths, path) } } return nil }) if err != nil { fmt.Printf("[!] Error scanning directory: %v\n", err) } fmt.Printf("[#] Total file number: %s%d%s\n", GREEN, len(filePaths), RESET) // 文件按大小排序 sort.Slice(filePaths, func(i, j int) bool { si, err1 := getFileSize(filePaths[i]) sj, err2 := getFileSize(filePaths[j]) if err1 != nil || err2 != nil { return false } return si < sj }) // 多线程处理文件 fmt.Println("[#] Processing files with multi-threading...") type result struct { passwords []string info string size string usetime string index int } resultsChan := make(chan result, len(filePaths)) var wg sync.WaitGroup semaphore := make(chan struct{}, maxWorkers) for i, file := range filePaths { wg.Add(1) semaphore <- struct{}{} go func(idx int, f string) { defer wg.Done() passwds, info, size, usetime := processFile(f, 8, 32) resultsChan <- result{passwds, info, size, usetime, idx} <-semaphore }(i, file) } go func() { wg.Wait() close(resultsChan) }() allPasswords := make([]string, 0, 1000000) duplicateSize := 0 for res := range resultsChan { if len(res.passwords) > 0 { fmt.Printf(" [+] [%d / %d] [%s%ss%s / %s%.2fs%s], Extracted %s%d%s from %s %s\n", res.index+1, len(filePaths), GREEN, res.usetime, RESET, RED, time.Since(startTime).Seconds(), RESET, YELLOW, len(res.passwords), RESET, res.info, res.size) allPasswords = append(allPasswords, res.passwords...) if (res.index+1)%50 == 0 { before := len(allPasswords) allPasswords = bucketDeduplicate(allPasswords, maxWorkers) after := len(allPasswords) duplicateSize += before - after runtime.GC() fmt.Printf("\r [=] Processing, Current size %s%s%s DUP:%s%s%s\n", GREEN, formatWithComma(len(allPasswords)), RESET, RED, formatWithComma(duplicateSize), RESET) } } else if strings.HasPrefix(res.info, "Error") { fmt.Printf(" [!] %s\n", res.info) } } runtime.GC() fmt.Printf("\n[#] Deduplicating... Current length %s%s%s, DUP:%s%s%s \n", GREEN, formatWithComma(len(allPasswords)), RESET, RED, formatWithComma(duplicateSize), RESET) before := len(allPasswords) passwdSet = bucketDeduplicate(allPasswords, maxWorkers) after := len(passwdSet) percentage := float64(after) * 100 / float64(before) fmt.Printf("[%s%.2fs%s] duplicated size: %s%s%s (%.2f%% kept)\n", GREEN, time.Since(startTime).Seconds(), RESET, RED, formatWithComma(before-after+duplicateSize), RESET, percentage) fmt.Printf("[#] Total unique passwords: %s%s%s\n", YELLOW, formatWithComma(len(passwdSet)), RESET) fmt.Printf("[#] Writing to file: %s... ", outputFile) writeStart := time.Now() err = os.WriteFile(outputFile, []byte(strings.Join(passwdSet, "\n")), 0644) if err != nil { fmt.Printf("Error writing output file: %v\n", err) } else { fmt.Printf("[%s%.2fs%s / %s%.2fs%s]\n", GREEN, time.Since(writeStart).Seconds(), RESET, RED, time.Since(startTime).Seconds(), RESET) } fmt.Printf("[$] Finished at: %s | Duration: %s%.2fs%s\n", time.Now().Format("2006-01-02 15:04:05"), GREEN, time.Since(startTime).Seconds(), RESET) } func bucketDeduplicate(passwords []string, workers int) []string { buckets := make(map[byte][]string) otherBucket := []string{} for i := range passwords { p := passwords[i] if len(p) == 0 { continue } pCopy := string([]byte(p)) // 避免与原始数据共享内存 first := pCopy[0] if (first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || (first >= '0' && first <= '9') { buckets[first] = append(buckets[first], pCopy) } else { otherBucket = append(otherBucket, pCopy) } passwords[i] = "" // 释放原始引用 } // 构造有序桶顺序 var keys []byte for ch := byte('0'); ch <= '9'; ch++ { if _, ok := buckets[ch]; ok { keys = append(keys, ch) } } for ch := byte('A'); ch <= 'Z'; ch++ { if _, ok := buckets[ch]; ok { keys = append(keys, ch) } } for ch := byte('a'); ch <= 'z'; ch++ { if _, ok := buckets[ch]; ok { keys = append(keys, ch) } } if len(otherBucket) > 0 { keys = append(keys, '#') // '#' 代表其他字符 } type dedupResult struct { bucket byte passwds []string } resultMap := make(map[byte][]string, len(keys)) var mu sync.Mutex var wg sync.WaitGroup sem := make(chan struct{}, workers) dedup := func(data []string) []string { set := make(map[string]struct{}, len(data)) for _, s := range data { set[s] = struct{}{} } out := make([]string, 0, len(set)) for s := range set { out = append(out, s) } return out } for _, k := range keys { wg.Add(1) sem <- struct{}{} go func(bucket byte, bucketData []string) { defer wg.Done() olen := len(bucketData) if olen == 0 { return } deduped := dedup(bucketData) mu.Lock() resultMap[bucket] = deduped mu.Unlock() fmt.Printf(" [*] Deduplicating bucket '%c'... %s%d/%d%s items (%.2f%%)\n", bucket, GREEN, len(deduped), olen, RESET, float64(len(deduped))*100/float64(olen)) <-sem }(k, func() []string { if k == '#' { return otherBucket } return buckets[k] }()) } wg.Wait() finalList := make([]string, 0) for _, k := range keys { finalList = append(finalList, resultMap[k]...) } return finalList } ``` # 使用方法 先对代码进行修改`aDir` 和 `outputFile`,可以改成当前目录,自行发挥吧。然后直接执行即可。 © 允许规范转载 打赏 赞赏作者 支付宝微信 赞 如果觉得我的文章对你有用,请随意赞赏