在现代Web应用中,处理大文件上传是一个常见但颇具挑战性的需求。传统的文件上传方式在面对GB级别的大文件时,往往会遇到以下问题:
针对这些问题,分片上传与断点续传技术应运而生。分片上传将大文件切割成多个小块(如10MB/片),分别上传到服务器;断点续传则记录上传进度,当网络中断后可以从中断处继续上传,而不是重新开始。
前端是整个上传流程的起点,负责文件的分片处理和进度管理。以下是关键实现步骤:
javascript复制// 计算文件分片信息
function calculateChunks(file, chunkSize) {
const totalChunks = Math.ceil(file.size / chunkSize);
return {
file,
totalSize: file.size,
chunkSize: chunkSize,
totalChunks: totalChunks,
chunkIndex: 0
};
}
javascript复制// 读取并上传分片
async function uploadChunk(task) {
const start = task.chunkIndex * task.chunkSize;
const end = Math.min(start + task.chunkSize, task.totalSize);
const chunk = task.file.slice(start, end);
const formData = new FormData();
formData.append('file', chunk);
formData.append('chunkIndex', task.chunkIndex);
formData.append('totalChunks', task.totalChunks);
await axios.post('/upload', formData);
task.chunkIndex++;
}
javascript复制// 更新上传进度
function updateProgress(task) {
const uploaded = task.chunkIndex * task.chunkSize;
const progress = Math.min(100, (uploaded / task.totalSize) * 100);
console.log(`上传进度: ${progress.toFixed(2)}%`);
}
后端需要处理分片上传、临时存储和最终合并。以下是Java实现的关键点:
java复制@PostMapping("/upload")
public ResponseEntity<String> uploadChunk(
@RequestParam("file") MultipartFile file,
@RequestParam("chunkIndex") int chunkIndex,
@RequestParam("totalChunks") int totalChunks) {
// 存储分片到临时目录
String tempDir = "/tmp/upload/" + file.getOriginalFilename();
File chunkFile = new File(tempDir, "chunk-" + chunkIndex);
file.transferTo(chunkFile);
// 记录上传进度
saveUploadProgress(file.getOriginalFilename(), chunkIndex, totalChunks);
return ResponseEntity.ok("分片上传成功");
}
java复制public void mergeChunks(String fileName, int totalChunks) throws IOException {
String tempDir = "/tmp/upload/" + fileName;
File outputFile = new File("/data/uploads/" + fileName);
try (FileOutputStream fos = new FileOutputStream(outputFile);
BufferedOutputStream bos = new BufferedOutputStream(fos)) {
for (int i = 0; i < totalChunks; i++) {
File chunkFile = new File(tempDir, "chunk-" + i);
try (FileInputStream fis = new FileInputStream(chunkFile);
BufferedInputStream bis = new BufferedInputStream(fis)) {
byte[] buffer = new byte[8192];
int bytesRead;
while ((bytesRead = bis.read(buffer)) != -1) {
bos.write(buffer, 0, bytesRead);
}
}
chunkFile.delete(); // 合并后删除分片
}
}
// 删除临时目录
new File(tempDir).delete();
}
断点续传的核心在于准确记录上传进度,并在中断后能够恢复。我们采用双重存储策略:
javascript复制// 保存上传进度到localStorage
function saveProgressToLocal(task) {
const progress = {
fileName: task.file.name,
fileSize: task.file.size,
chunkSize: task.chunkSize,
totalChunks: task.totalChunks,
uploadedChunks: task.chunkIndex,
lastModified: task.file.lastModified
};
localStorage.setItem(`upload_${task.file.name}`, JSON.stringify(progress));
}
// 从localStorage恢复进度
function restoreProgressFromLocal(file) {
const key = `upload_${file.name}`;
const saved = localStorage.getItem(key);
if (saved) {
const progress = JSON.parse(saved);
if (progress.fileSize === file.size &&
progress.lastModified === file.lastModified) {
return progress;
}
}
return null;
}
java复制@Entity
@Table(name = "upload_progress")
public class UploadProgress {
@Id
private String taskId; // 上传任务ID
private String fileName; // 文件名
private String filePath; // 文件存储路径
private long fileSize; // 文件大小
private int chunkSize; // 分片大小
private int totalChunks; // 总分片数
private int uploadedChunks; // 已上传分片数
private Date lastUpdate; // 最后更新时间
// getters and setters
}
完整的断点续传流程如下:
java复制@GetMapping("/progress")
public UploadProgress checkProgress(
@RequestParam("fileName") String fileName,
@RequestParam("fileSize") long fileSize) {
// 根据文件名和大小查询进度(防止同名不同文件)
return progressRepository.findByFileNameAndFileSize(fileName, fileSize)
.orElse(null);
}
javascript复制async function resumeUpload(file) {
// 检查本地进度
const localProgress = restoreProgressFromLocal(file);
if (!localProgress) return startNewUpload(file);
// 检查服务器进度
const serverProgress = await checkServerProgress(file.name, file.size);
if (!serverProgress) return startNewUpload(file);
// 取两者中较小的进度(防止不一致)
const progress = Math.min(localProgress.uploadedChunks,
serverProgress.uploadedChunks);
// 从断点处继续上传
const task = createUploadTask(file, localProgress.chunkSize);
task.chunkIndex = progress;
return uploadChunks(task);
}
javascript复制// 控制并发上传数
async function uploadWithConcurrency(task, concurrency = 3) {
const promises = [];
while (task.chunkIndex < task.totalChunks) {
if (promises.length < concurrency) {
promises.push(uploadChunk(task));
task.chunkIndex++;
} else {
await Promise.race(promises);
promises = promises.filter(p => !p.isCompleted);
}
}
await Promise.all(promises);
}
java复制// 使用流式处理避免内存溢出
@PostMapping("/upload-stream")
public void uploadStream(@RequestParam("file") MultipartFile file) {
try (InputStream is = file.getInputStream();
OutputStream os = new FileOutputStream("/data/uploads/" + file.getOriginalFilename())) {
byte[] buffer = new byte[8192];
int bytesRead;
while ((bytesRead = is.read(buffer)) != -1) {
os.write(buffer, 0, bytesRead);
}
}
}
java复制// 文件完整性校验
public boolean verifyFile(File file, String expectedMd5) {
try (InputStream is = new FileInputStream(file)) {
String actualMd5 = DigestUtils.md5Hex(is);
return expectedMd5.equals(actualMd5);
}
}
javascript复制// 前端加密分片
async function encryptChunk(chunk, key) {
const arrayBuffer = await chunk.arrayBuffer();
const wordArray = CryptoJS.lib.WordArray.create(arrayBuffer);
return CryptoJS.AES.encrypt(wordArray, key).toString();
}
java复制// 后端解密
public byte[] decryptChunk(String encrypted, String key) {
byte[] encryptedBytes = encrypted.getBytes(StandardCharsets.UTF_8);
Cipher cipher = Cipher.getInstance("AES");
cipher.init(Cipher.DECRYPT_MODE, new SecretKeySpec(key.getBytes(), "AES"));
return cipher.doFinal(encryptedBytes);
}
properties复制# 上传配置
spring.servlet.multipart.max-file-size=10GB
spring.servlet.multipart.max-request-size=10GB
# 存储路径
file.upload-dir=/data/uploads
file.temp-dir=/tmp/uploads
# 数据库配置
spring.datasource.url=jdbc:mysql://localhost:3306/upload_db
spring.datasource.username=root
spring.datasource.password=123456
java复制@Configuration
public class WebConfig implements WebMvcConfigurer {
@Value("${file.upload-dir}")
private String uploadDir;
@Bean
public MultipartConfigElement multipartConfigElement() {
MultipartConfigFactory factory = new MultipartConfigFactory();
factory.setLocation(uploadDir);
return factory.createMultipartConfig();
}
}
vue复制<template>
<div>
<input type="file" @change="handleFileChange" />
<button @click="startUpload">开始上传</button>
<div>进度: {{progress}}%</div>
</div>
</template>
<script>
export default {
data() {
return {
file: null,
progress: 0,
chunkSize: 10 * 1024 * 1024 // 10MB
};
},
methods: {
handleFileChange(e) {
this.file = e.target.files[0];
},
async startUpload() {
if (!this.file) return;
const totalChunks = Math.ceil(this.file.size / this.chunkSize);
for (let i = 0; i < totalChunks; i++) {
const chunk = this.file.slice(
i * this.chunkSize,
Math.min((i + 1) * this.chunkSize, this.file.size)
);
const formData = new FormData();
formData.append('file', chunk);
formData.append('chunkIndex', i);
formData.append('totalChunks', totalChunks);
await axios.post('/upload', formData, {
onUploadProgress: e => {
this.progress = Math.round((e.loaded / e.total) * 100);
}
});
}
}
}
};
</script>
javascript复制// 根据网络状况动态调整分片大小
function getDynamicChunkSize() {
const connection = navigator.connection;
if (connection) {
switch (connection.effectiveType) {
case '4g': return 10 * 1024 * 1024; // 10MB
case '3g': return 5 * 1024 * 1024; // 5MB
default: return 1 * 1024 * 1024; // 1MB
}
}
return 5 * 1024 * 1024; // 默认5MB
}
java复制// 使用Redis缓存上传进度,提高查询效率
@Repository
public class RedisProgressRepository {
private final RedisTemplate<String, UploadProgress> redisTemplate;
public void saveProgress(UploadProgress progress) {
redisTemplate.opsForValue().set(
"upload:" + progress.getTaskId(),
progress,
1, TimeUnit.HOURS
);
}
}
javascript复制async function uploadFolder(folder) {
const entries = [...folder.webkitGetAsEntry().createReader().readEntries()];
for (const entry of entries) {
if (entry.isFile) {
const file = await getFile(entry);
await uploadFile(file, entry.fullPath);
} else if (entry.isDirectory) {
await uploadFolder(entry);
}
}
}
java复制public String saveWithPath(MultipartFile file, String relativePath) {
Path dest = Paths.get(uploadDir, relativePath);
Files.createDirectories(dest.getParent());
file.transferTo(dest);
return dest.toString();
}
javascript复制// 前端计算文件指纹
async function calculateFileFingerprint(file) {
const spark = new SparkMD5.ArrayBuffer();
const chunkSize = 2 * 1024 * 1024; // 2MB
const chunks = Math.ceil(file.size / chunkSize);
for (let i = 0; i < chunks; i++) {
const chunk = file.slice(i * chunkSize, (i + 1) * chunkSize);
const buffer = await chunk.arrayBuffer();
spark.append(buffer);
}
return spark.end();
}
java复制@GetMapping("/check-exist")
public ResponseEntity<?> checkFileExist(
@RequestParam("fingerprint") String fingerprint,
@RequestParam("size") long size) {
Optional<FileRecord> record = fileRepository.findByFingerprintAndSize(fingerprint, size);
if (record.isPresent()) {
return ResponseEntity.ok().body(
Map.of("exists", true, "url", record.get().getUrl())
);
}
return ResponseEntity.ok().body(Map.of("exists", false));
}
nginx复制client_max_body_size 10G;
proxy_request_buffering off;
proxy_buffering off;
sql复制CREATE TABLE upload_progress (
task_id VARCHAR(64) PRIMARY KEY,
file_name VARCHAR(255) NOT NULL,
file_path VARCHAR(512) NOT NULL,
file_size BIGINT NOT NULL,
chunk_size INT NOT NULL,
total_chunks INT NOT NULL,
uploaded_chunks INT NOT NULL,
status VARCHAR(20) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
INDEX idx_file_name (file_name),
INDEX idx_status (status)
);
java复制@Bean
public MeterRegistryCustomizer<MeterRegistry> metricsCommonTags() {
return registry -> registry.config().commonTags(
"application", "file-uploader",
"region", System.getenv("REGION")
);
}
@RestController
public class UploadMetrics {
private final Counter uploadCounter;
private final Summary uploadSizeSummary;
public UploadMetrics(MeterRegistry registry) {
uploadCounter = registry.counter("upload.count");
uploadSizeSummary = registry.summary("upload.size");
}
@PostMapping("/upload")
public void upload(@RequestParam("file") MultipartFile file) {
uploadCounter.increment();
uploadSizeSummary.record(file.getSize());
}
}
java复制@Slf4j
@RestController
public class UploadController {
@PostMapping("/upload")
public void upload(@RequestParam("file") MultipartFile file) {
MDC.put("file", file.getOriginalFilename());
log.info("开始上传文件,大小: {}", file.getSize());
try {
// 上传逻辑
log.info("文件上传成功");
} catch (Exception e) {
log.error("文件上传失败", e);
throw e;
} finally {
MDC.clear();
}
}
}
xml复制<ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="文件上传测试">
<intProp name="ThreadGroup.num_threads">50</intProp>
<intProp name="ThreadGroup.ramp_time">10</intProp>
<longProp name="ThreadGroup.duration">300</longProp>
<HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="上传分片">
<elementProp name="HTTPsampler.Arguments" elementType="Arguments">
<collectionProp name="Arguments.arguments">
<elementProp name="file" elementType="HTTPArgument">
<boolProp name="HTTPArgument.always_encode">false</boolProp>
<stringProp name="Argument.name">file</stringProp>
<stringProp name="Argument.value">${TEST_FILE}</stringProp>
<stringProp name="Argument.metadata">=</stringProp>
<boolProp name="HTTPArgument.use_equals">true</boolProp>
</elementProp>
</collectionProp>
</elementProp>
<stringProp name="HTTPSampler.domain">${SERVER_HOST}</stringProp>
<stringProp name="HTTPSampler.port">${SERVER_PORT}</stringProp>
<stringProp name="HTTPSampler.protocol">http</stringProp>
<stringProp name="HTTPSampler.path">/upload</stringProp>
<stringProp name="HTTPSampler.method">POST</stringProp>
</HTTPSamplerProxy>
</ThreadGroup>
| 方案 | 优点 | 缺点 | 适用场景 |
|---|---|---|---|
| 原生XHR | 兼容性好,无需额外依赖 | 代码量大,功能有限 | 简单上传需求 |
| Axios | Promise API,拦截器支持 | 需要额外处理进度事件 | 大多数现代应用 |
| Fetch API | 现代浏览器内置 | 不支持进度事件,需额外处理 | 简单REST调用 |
| WebSocket | 实时双向通信 | 服务器实现复杂 | 需要实时反馈的场景 |
在实际项目中实现大文件上传时,有几个关键点需要特别注意:
java复制// 每上传5个分片或每隔30秒保存一次进度
private boolean shouldSaveProgress(UploadTask task) {
return task.getChunkIndex() % 5 == 0 ||
System.currentTimeMillis() - task.getLastSaved() > 30_000;
}
javascript复制async function retryUpload(chunk, maxRetries = 3) {
let attempts = 0;
while (attempts < maxRetries) {
try {
return await uploadChunk(chunk);
} catch (err) {
attempts++;
if (attempts >= maxRetries) throw err;
await new Promise(resolve => setTimeout(resolve, 1000 * attempts));
}
}
}
java复制@Scheduled(fixedRate = 24 * 60 * 60 * 1000) // 每天执行一次
public void cleanupTempFiles() {
File tempDir = new File(tempUploadDir);
File[] files = tempDir.listFiles(file ->
System.currentTimeMillis() - file.lastModified() > 7 * 24 * 60 * 60 * 1000
);
if (files != null) {
for (File file : files) {
file.delete();
}
}
}
javascript复制// 基于网络状况的动态分片
function getAdaptiveChunkSize(networkInfo) {
const baseSize = 1 * 1024 * 1024; // 1MB
const maxSize = 20 * 1024 * 1024; // 20MB
// 根据网络类型调整
let multiplier = 1;
if (networkInfo.effectiveType === '4g') multiplier = 5;
if (networkInfo.saveData) multiplier = 0.5;
// 根据实际带宽动态调整
if (networkInfo.downlink) {
multiplier *= Math.min(5, networkInfo.downlink / 10);
}
return Math.min(maxSize, baseSize * multiplier);
}
java复制public String blockchainNotarize(String fileHash) {
BlockchainClient client = new BlockchainClient();
String txHash = client.sendTransaction(fileHash);
return txHash;
}
java复制public void checkFileContent(File file) {
byte[] bytes = Files.readAllBytes(file.toPath());
AIClient client = new AIClient();
AICheckResult result = client.checkContent(bytes);
if (result.hasViolation()) {
throw new ContentViolationException(result.getReason());
}
}
code复制file-uploader/
├── frontend/ # 前端代码
│ ├── public/ # 静态资源
│ ├── src/
│ │ ├── components/ # Vue组件
│ │ │ └── Uploader.vue # 上传组件
│ │ ├── utils/
│ │ │ └── upload.js # 上传工具类
│ │ └── App.vue # 主组件
├── backend/ # 后端代码
│ ├── src/main/
│ │ ├── java/com/example/
│ │ │ ├── config/ # 配置类
│ │ │ ├── controller/ # 控制器
│ │ │ ├── model/ # 数据模型
│ │ │ ├── repository/ # 数据访问
│ │ │ ├── service/ # 业务逻辑
│ │ │ └── FileUploaderApplication.java
│ │ └── resources/
│ │ ├── application.properties
│ │ └── static/ # 静态资源
├── docs/ # 文档
│ ├── api.md # API文档
│ └── deploy.md # 部署指南
└── scripts/ # 脚本
├── deploy.sh # 部署脚本
└── test.sh # 测试脚本
javascript复制// 在浏览器控制台创建测试用大文件
function createTestFile(sizeMB) {
const size = sizeMB * 1024 * 1024;
const blob = new Blob([new ArrayBuffer(size)], {type: 'application/octet-stream'});
return new File([blob], `test_${sizeMB}MB.dat`);
}
java复制@Slf4j
@RestController
public class UploadController {
@PostMapping("/upload")
public void upload(@RequestParam("file") MultipartFile file,
@RequestParam("chunkIndex") int chunkIndex) {
MDC.put("chunk", String.valueOf(chunkIndex));
log.info("开始接收分片,大小: {}", file.getSize());
try {
// 处理逻辑
log.debug("分片处理完成");
} catch (Exception e) {
log.error("分片处理失败", e);
throw e;
} finally {
MDC.clear();
}
}
}
经过多个项目的实践验证,以下是大文件上传系统的最佳实践:
java复制// 重要监控指标
meterRegistry.gauge("upload.queue.size", uploadQueue.size());
meterRegistry.gauge("upload.active.threads", threadPool.getActiveCount());
meterRegistry.counter("upload.total.bytes").increment(bytes);
meterRegistry.timer("upload.time").record(duration);
对于不想从零实现的团队,可以考虑以下开源方案:
移动端上传需要考虑额外因素:
javascript复制// 注册Service Worker处理后台上传
navigator.serviceWorker.register('/upload-sw.js').then(() => {
if ('BackgroundFetchManager' in self) {
navigator.serviceWorker.ready.then((sw) => {
sw.backgroundFetch.fetch('large-upload', [
new Request('/upload', {method: 'POST', body: formData})
], {
title: '文件上传中',
icons: [{src: '/icon.png', sizes: '72x72', type: 'image/png'}]
});
});
}
});
javascript复制// 监听网络变化
function handleNetworkChange() {
const connection = navigator.connection;
if (connection) {
connection.addEventListener('change', () => {
const newChunkSize = getDynamicChunkSize();
adjustUploadStrategy(newChunkSize);
});
}
}
java复制// Android端后台服务优化
public class UploadService extends JobIntentService {
@Override
protected void onHandleWork(@NonNull Intent intent) {
// 分批处理上传任务
// 根据电量状态调整策略
BatteryManager bm = (BatteryManager)getSystemService(BATTERY_SERVICE);
if (bm != null && !bm.isCharging()) {
// 省电模式:减小分片大小,降低频率
setUploadConfig(1 * 1024 * 1024, 1);
} else {
// 正常模式
setUploadConfig(5 * 1024 * 1024, 3);
}
}
}
javascript复制// 利用Node.js文件系统API
const fs = require('fs');
const path = require('path');
function uploadInElectron(filePath) {
const stats = fs.statSync(filePath);
const fileSize = stats.size;
const chunkSize = 10 * 1024 * 1024;
for (let i = 0; i < Math.ceil(fileSize / chunkSize); i++) {
const start = i * chunkSize;
const end = Math.min(start + chunkSize, fileSize);
const readStream = fs.createReadStream(filePath, {start, end});
// 上传readStream
}
}
javascript复制// 使用react-native-fs处理文件
import RNFS from 'react-native-fs';
async function uploadInRN(filePath) {
const stat = await RNFS.stat(filePath);
const chunkSize = 5 * 1024 * 1024;
for (let i = 0; i < Math.ceil(stat.size / chunkSize); i++) {
const start = i * chunkSize;
const end = Math.min(start + chunkSize, stat.size);
const chunk = await RNFS.read(filePath, end - start, start, 'base64');
// 上传chunk
}
}
dart复制// 使用dart:io处理文件上传
import 'dart:io';
Future<void> uploadInFlutter(File file) async {
final fileSize = await file.length();
const chunkSize = 8 * 1024 * 1024;
final totalChunks = (fileSize / chunkSize).ceil();
for (var i = 0; i < totalChunks; i++) {
final start = i * chunkSize;
final end = min(start + chunkSize, fileSize);
final chunk = file.openRead(start, end);
// 上传chunk
}
}
通过Nginx的nginx-upload-module模块,可以直接处理分片上传:
nginx复制# nginx配置
server {
listen 80;
server_name upload.example.com;
upload_pass @backend;
upload_store /tmp/nginx_upload;
upload_store_access user:rw group:rw all:r;
upload_set_form_field $upload_field_name.name "$upload_file_name";
upload_set_form_field $upload_field_name.path "$upload_tmp_path";
upload_aggregate_form_field "$upload_field_name.md5" "$upload_file_md5";
upload_aggregate_form_field "$upload_field_name.size" "$upload_file_size";
upload_pass_form_field "^submit$|^description$";
upload_cleanup 400 404 499 500-505;
location @backend {
proxy_pass http://backend;
}
}
对于超大规模系统,可以考虑以下架构:
java复制// 上传服务定义
@RestController
@RequestMapping("/api/upload")
public class UploadController {
@Autowired
private TaskQueue taskQueue;
@PostMapping
public String handleUpload(@RequestParam MultipartFile file) {
String taskId = generateTaskId();
taskQueue.add(new UploadTask(taskId, file));
return taskId;
}
@GetMapping("/status/{taskId}")
public UploadStatus checkStatus(@PathVariable String taskId) {
return statusService.getStatus(taskId);
}
}