<font>// chunk_file.go<i> package fileuploader
import ( "crypto/md5" "encoding/hex" "fmt" "io" "os" "sync" )
// ChunkFile splits a file into smaller chunks and returns metadata for each chunk.<i> // It reads the file sequentially and chunks it based on the specified chunk size.<i> func (c *DefaultFileChunker) ChunkFile(filePath string) ([]ChunkMeta, error) { var chunks []ChunkMeta // Store metadata for each chunk<i>
// Open the file for reading<i> file, err := os.Open(filePath) if err != nil { return nil, err } defer file.Close()
// Create a buffer to hold the chunk data<i> buffer := make([]byte, c.chunkSize) index := 0 // Initialize chunk index<i>
// Loop until EOF is reached<i> for { // Read chunkSize bytes from the file into the buffer<i> bytesRead, err := file.Read(buffer) if err != nil && err != io.EOF { return nil, err } if bytesRead == 0 { break // If bytesRead is 0, it means EOF is reached<i> }
// Generate a unique hash for the chunk data<i> hash := md5.Sum(buffer[:bytesRead]) hashString := hex.EncodeToString(hash[:])
// Construct the chunk file name<i> chunkFileName := fmt.Sprintf("%s.chunk.%d", filePath, index)
// Create a new chunk file and write the buffer data to it<i> chunkFile, err := os.Create(chunkFileName) if err != nil { return nil, err } _, err = chunkFile.Write(buffer[:bytesRead]) if err != nil { return nil, err }
// Append metadata for the chunk to the chunks slice<i> chunks = append(chunks, ChunkMeta{FileName: chunkFileName, MD5Hash: hashString, Index: index})
// Close the chunk file<i> chunkFile.Close()
// Move to the next chunk<i> index++ }
return chunks, nil }
// ChunklargeFile splits a large file into smaller chunks in parallel and returns metadata for each chunk.<i> // It divides the file into chunks and processes them concurrently using multiple goroutines.<i> func (c *DefaultFileChunker) ChunklargeFile(filePath string) ([]ChunkMeta, error) { var wg sync.WaitGroup var mu sync.Mutex var chunks []ChunkMeta // Store metadata for each chunk<i>
// Open the file for reading<i> file, err := os.Open(filePath) if err != nil { return nil, err } defer file.Close()
// Get file information to determine the number of chunks<i> fileInfo, err := file.Stat() if err != nil { return nil, err }
numChunks := int(fileInfo.Size() / int64(c.chunkSize)) if fileInfo.Size()%int64(c.chunkSize) != 0 { numChunks++ }
// Create channels to communicate between goroutines<i> chunkChan := make(chan ChunkMeta, numChunks) errChan := make(chan error, numChunks) indexChan := make(chan int, numChunks)
// Populate the index channel with chunk indices<i> for i := 0; i < numChunks; i++ { indexChan <- i } close(indexChan)
// Start multiple goroutines to process chunks in parallel<i> for i := 0; i < 4; i++ { // Number of parallel workers<i> wg.Add(1) go func() { defer wg.Done() for index := range indexChan { // Calculate the offset for the current chunk<i> offset := int64(index) * int64(c.chunkSize) buffer := make([]byte, c.chunkSize) // Create a buffer for chunk data<i>
// Seek to the appropriate position in the file<i> file.Seek(offset, 0)
// Read chunkSize bytes from the file into the buffer<i> bytesRead, err := file.Read(buffer) if err != nil && err != io.EOF { errChan <- err return }
// If bytesRead is 0, it means EOF is reached<i> if bytesRead > 0 { // Generate a unique hash for the chunk data<i> hash := md5.Sum(buffer[:bytesRead]) hashString := hex.EncodeToString(hash[:])
// Construct the chunk file name<i> chunkFileName := fmt.Sprintf("%s.chunk.%d", filePath, index)
// Create a new chunk file and write the buffer data to it<i> chunkFile, err := os.Create(chunkFileName) if err != nil { errChan <- err return } _, err = chunkFile.Write(buffer[:bytesRead]) if err != nil { errChan <- err return }
// Append metadata for the chunk to the chunks slice<i> chunk := ChunkMeta{ FileName: chunkFileName, MD5Hash: hashString, Index: index, } mu.Lock() chunks = append(chunks, chunk) mu.Unlock()
// Close the chunk file<i> chunkFile.Close()
// Send the processed chunk to the chunk channel<i> chunkChan <- chunk } } }() }
// Wait for all goroutines to finish<i> go func() { wg.Wait() close(chunkChan) close(errChan) }()
// Check for errors from goroutines<i> for err := range errChan { if err != nil { return nil, err } }
return chunks, nil }
|