HDFS API的高階程式設計
HDFS的API就兩個:FileSystem 和Configuration
1、檔案的上傳和下載
1 package com.ghgj.hdfs.api; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.FileSystem; 5 import org.apache.hadoop.fs.Path; 6 7 public class HDFS_GET_AND_PUT { 8 9 public static void main(String[] args) throws Exception { 10 11 12 Configuration conf = new Configuration(); 13 conf.set("fs.defaultFS", "hdfs://hadoop1:9000"); 14 conf.set("dfs.replication", "2"); 15 FileSystem fs = FileSystem.get(conf); 16 17 18 /** 19 * 更改操作使用者有兩種方式: 20 * 21 * 1、直接設定執行換種的使用者名稱為hadoop 22 * 23 * VM arguments ; -DHADOOP_USER_NAME=hadoop 24 * 25 * 2、在程式碼中進行宣告 26 * 27 * System.setProperty("HADOOP_USER_NAME", "hadoop"); 28 */ 29 System.setProperty("HADOOP_USER_NAME", "hadoop"); 30 31 // 上傳 32 fs.copyFromLocalFile(new Path("c:/sss.txt"), new Path("/a/ggg.txt")); 33 34 35 36 /** 37 * .crc : 校驗檔案 38 * 39 * 每個塊的後設資料資訊都只會記錄合法資料的起始偏移量: qqq.txt blk_41838 : 0 - 1100byte 40 * 41 * 如果進行非法的資料追加。最終是能夠下載合法資料。 42 * 由於你在資料的中間, 也就是說在 0 -1100 之間的範圍進行了資料資訊的更改。 造成了採用CRC演算法計算出來校驗值,和最初存入進HDFS的校驗值 43 * 不一致。HDFS就認為當前這個檔案被損壞了。 44 */ 45 46 47 // 下載 48 fs.copyToLocalFile(new Path("/a/qqq.txt"), new Path("c:/qqq3.txt")); 49 50 51 /** 52 * 上傳和下載的API的底層封裝其實就是 : FileUtil.copy(....) 53 */ 54 55 fs.close(); 56 } 57 }
2、配置檔案conf
1 package com.exam.hdfs; 2 3 import java.io.IOException; 4 import java.util.Iterator; 5 import java.util.Map.Entry; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.fs.FileSystem; 9 10 public class TestConf1 { 11 12 public static void main(String[] args) throws Exception { 13 14 15 /** 16 * 底層會載入一堆的配置檔案: 17 * 18 * core-default.xml 19 * hdfs-default.xml 20 * mapred-default.xml 21 * yarn-default.xml 22 */ 23 Configuration conf = new Configuration(); 24 // conf.addResource("hdfs-default.xml"); 25 26 /** 27 * 當前這個hdfs-site.xml檔案就放置在這個專案中的src下。也就是classpath路徑下。 28 * 所以 FS在初始化的時候,會把hdfs-site.xml這個檔案中的name-value對解析到conf中 29 * 30 * 31 * 但是: 32 * 33 * 1、如果hdfs-site.xml 不在src下, 看是否能載入??? 不能 34 * 35 * 2、如果檔名不叫做 hdfs-default.xml 或者 hdsf-site.xml 看是否能自動載入??? 不能 36 * 37 * 得出的結論: 38 * 39 * 如果需要專案程式碼自動載入配置檔案中的資訊,那麼就必須把配置檔案改成-default.xml或者-site.xml的名稱 40 * 而且必須放置在src下 41 * 42 * 那如果不叫這個名,或者不在src下,也需要載入這些配置檔案中的引數: 43 * 44 * 必須使用conf物件提供的一些方法去手動載入 45 */ 46 // conf.addResource("hdfs-site.xml"); 47 conf.set("dfs.replication", "1"); 48 conf.addResource("myconfig/hdfs-site.xml"); 49 50 51 /** 52 * 依次載入的引數資訊的順序是: 53 * 54 * 1、載入 core/hdfs/mapred/yarn-default.xml 55 * 56 * 2、載入通過conf.addResources()載入的配置檔案 57 * 58 * 3、載入conf.set(name, value) 59 */ 60 61 FileSystem fs = FileSystem.get(conf); 62 63 System.out.println(conf.get("dfs.replication")); 64 65 66 Iterator<Entry<String, String>> iterator = conf.iterator(); 67 while(iterator.hasNext()){ 68 Entry<String, String> e = iterator.next(); 69 System.out.println(e.getKey() + "\t" + e.getValue()); 70 } 71 } 72 }
輸出結果
1 log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory). 2 log4j:WARN Please initialize the log4j system properly. 3 log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. 4 1 5 hadoop.security.groups.cache.secs 300 6 dfs.datanode.cache.revocation.timeout.ms 900000 7 dfs.namenode.resource.check.interval 5000 8 s3.client-write-packet-size 65536 9 dfs.client.https.need-auth false 10 dfs.replication 1 11 hadoop.security.group.mapping.ldap.directory.search.timeout 10000 12 dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold 10737418240 13 hadoop.work.around.non.threadsafe.getpwuid false 14 dfs.namenode.write-lock-reporting-threshold-ms 5000 15 fs.ftp.host.port 21 16 dfs.namenode.avoid.read.stale.datanode false 17 dfs.journalnode.rpc-address 0.0.0.0:8485 18 hadoop.security.kms.client.encrypted.key.cache.expiry 43200000 19 ipc.client.connection.maxidletime 10000 20 hadoop.registry.zk.session.timeout.ms 60000 21 tfile.io.chunk.size 1048576 22 fs.automatic.close true 23 ha.health-monitor.sleep-after-disconnect.ms 1000 24 io.map.index.interval 128 25 dfs.namenode.https-address 0.0.0.0:50470 26 dfs.mover.max-no-move-interval 60000 27 io.seqfile.sorter.recordlimit 1000000 28 fs.s3n.multipart.uploads.enabled false 29 hadoop.util.hash.type murmur 30 dfs.namenode.replication.min 1 31 dfs.datanode.directoryscan.threads 1 32 dfs.namenode.fs-limits.min-block-size 1048576 33 dfs.datanode.directoryscan.interval 21600 34 fs.AbstractFileSystem.file.impl org.apache.hadoop.fs.local.LocalFs 35 dfs.namenode.acls.enabled false 36 dfs.client.short.circuit.replica.stale.threshold.ms 1800000 37 net.topology.script.number.args 100 38 hadoop.http.authentication.token.validity 36000 39 fs.s3.block.size 67108864 40 dfs.namenode.resource.du.reserved 104857600 41 ha.failover-controller.graceful-fence.rpc-timeout.ms 5000 42 s3native.bytes-per-checksum 512 43 dfs.namenode.datanode.registration.ip-hostname-check true 44 dfs.namenode.path.based.cache.block.map.allocation.percent 0.25 45 dfs.namenode.backup.http-address 0.0.0.0:50105 46 hadoop.security.group.mapping org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback 47 dfs.namenode.edits.noeditlogchannelflush false 48 dfs.datanode.cache.revocation.polling.ms 500 49 dfs.namenode.audit.loggers default 50 hadoop.security.groups.cache.warn.after.ms 5000 51 io.serializations org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization 52 dfs.namenode.lazypersist.file.scrub.interval.sec 300 53 fs.s3a.threads.core 15 54 hadoop.security.crypto.buffer.size 8192 55 hadoop.http.cross-origin.allowed-methods GET,POST,HEAD 56 hadoop.registry.zk.retry.interval.ms 1000 57 dfs.http.policy HTTP_ONLY 58 hadoop.registry.secure false 59 dfs.namenode.replication.interval 3 60 dfs.namenode.safemode.min.datanodes 0 61 dfs.client.file-block-storage-locations.num-threads 10 62 nfs.dump.dir /tmp/.hdfs-nfs 63 dfs.namenode.secondary.https-address 0.0.0.0:50091 64 hadoop.kerberos.kinit.command kinit 65 dfs.block.access.token.lifetime 600 66 dfs.webhdfs.enabled true 67 dfs.client.use.datanode.hostname false 68 dfs.namenode.delegation.token.max-lifetime 604800000 69 fs.trash.interval 0 70 dfs.datanode.drop.cache.behind.writes false 71 dfs.namenode.avoid.write.stale.datanode false 72 dfs.namenode.num.extra.edits.retained 1000000 73 s3.blocksize 67108864 74 ipc.client.connect.max.retries.on.timeouts 45 75 dfs.datanode.data.dir /home/hadoop/data/hadoopdata/data 76 fs.s3.buffer.dir ${hadoop.tmp.dir}/s3 77 fs.s3n.block.size 67108864 78 nfs.exports.allowed.hosts * rw 79 ha.health-monitor.connect-retry-interval.ms 1000 80 hadoop.security.instrumentation.requires.admin false 81 hadoop.registry.zk.retry.ceiling.ms 60000 82 nfs.rtmax 1048576 83 dfs.client.mmap.cache.size 256 84 dfs.datanode.data.dir.perm 700 85 io.file.buffer.size 4096 86 dfs.namenode.backup.address 0.0.0.0:50100 87 dfs.client.datanode-restart.timeout 30 88 dfs.datanode.readahead.bytes 4194304 89 dfs.namenode.xattrs.enabled true 90 io.mapfile.bloom.size 1048576 91 ipc.client.connect.retry.interval 1000 92 dfs.client-write-packet-size 65536 93 dfs.namenode.checkpoint.txns 1000000 94 dfs.datanode.bp-ready.timeout 20 95 dfs.datanode.transfer.socket.send.buffer.size 131072 96 hadoop.security.kms.client.authentication.retry-count 1 97 dfs.client.block.write.retries 3 98 fs.swift.impl org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem 99 ha.failover-controller.graceful-fence.connection.retries 1 100 hadoop.registry.zk.connection.timeout.ms 15000 101 dfs.namenode.safemode.threshold-pct 0.999f 102 dfs.cachereport.intervalMsec 10000 103 hadoop.security.java.secure.random.algorithm SHA1PRNG 104 ftp.blocksize 67108864 105 dfs.namenode.list.cache.directives.num.responses 100 106 dfs.namenode.kerberos.principal.pattern * 107 file.stream-buffer-size 4096 108 dfs.datanode.dns.nameserver default 109 fs.s3a.max.total.tasks 1000 110 dfs.namenode.replication.considerLoad true 111 nfs.allow.insecure.ports true 112 dfs.namenode.edits.journal-plugin.qjournal org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager 113 dfs.client.write.exclude.nodes.cache.expiry.interval.millis 600000 114 dfs.client.mmap.cache.timeout.ms 3600000 115 ipc.client.idlethreshold 4000 116 io.skip.checksum.errors false 117 ftp.stream-buffer-size 4096 118 fs.s3a.fast.upload false 119 dfs.client.failover.connection.retries.on.timeouts 0 120 file.blocksize 67108864 121 ftp.replication 3 122 dfs.namenode.replication.work.multiplier.per.iteration 2 123 hadoop.security.authorization false 124 hadoop.http.authentication.simple.anonymous.allowed true 125 s3native.client-write-packet-size 65536 126 hadoop.rpc.socket.factory.class.default org.apache.hadoop.net.StandardSocketFactory 127 file.bytes-per-checksum 512 128 dfs.datanode.slow.io.warning.threshold.ms 300 129 fs.har.impl.disable.cache true 130 rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB org.apache.hadoop.ipc.ProtobufRpcEngine 131 io.seqfile.lazydecompress true 132 dfs.namenode.reject-unresolved-dn-topology-mapping false 133 hadoop.common.configuration.version 0.23.0 134 hadoop.security.authentication simple 135 dfs.datanode.drop.cache.behind.reads false 136 dfs.image.compression.codec org.apache.hadoop.io.compress.DefaultCodec 137 dfs.client.read.shortcircuit.streams.cache.size 256 138 file.replication 1 139 dfs.namenode.top.num.users 10 140 dfs.namenode.accesstime.precision 3600000 141 dfs.namenode.fs-limits.max-xattrs-per-inode 32 142 dfs.image.transfer.timeout 60000 143 io.mapfile.bloom.error.rate 0.005 144 nfs.wtmax 1048576 145 hadoop.security.kms.client.encrypted.key.cache.size 500 146 dfs.namenode.edit.log.autoroll.check.interval.ms 300000 147 fs.s3a.multipart.purge false 148 dfs.namenode.support.allow.format true 149 hadoop.hdfs.configuration.version 1 150 fs.s3a.connection.establish.timeout 5000 151 hadoop.security.group.mapping.ldap.search.attr.member member 152 dfs.secondary.namenode.kerberos.internal.spnego.principal ${dfs.web.authentication.kerberos.principal} 153 dfs.stream-buffer-size 4096 154 hadoop.ssl.client.conf ssl-client.xml 155 dfs.namenode.invalidate.work.pct.per.iteration 0.32f 156 fs.s3a.multipart.purge.age 86400 157 dfs.journalnode.https-address 0.0.0.0:8481 158 dfs.namenode.top.enabled true 159 hadoop.security.kms.client.encrypted.key.cache.low-watermark 0.3f 160 dfs.namenode.max.objects 0 161 hadoop.user.group.static.mapping.overrides dr.who=; 162 fs.s3a.fast.buffer.size 1048576 163 dfs.bytes-per-checksum 512 164 dfs.datanode.max.transfer.threads 4096 165 dfs.block.access.key.update.interval 600 166 ipc.maximum.data.length 67108864 167 tfile.fs.input.buffer.size 262144 168 ha.failover-controller.new-active.rpc-timeout.ms 60000 169 dfs.client.cached.conn.retry 3 170 dfs.client.read.shortcircuit false 171 hadoop.ssl.hostname.verifier DEFAULT 172 dfs.datanode.hdfs-blocks-metadata.enabled false 173 dfs.datanode.directoryscan.throttle.limit.ms.per.sec 0 174 dfs.image.transfer.chunksize 65536 175 hadoop.http.authentication.type simple 176 dfs.namenode.list.encryption.zones.num.responses 100 177 dfs.client.https.keystore.resource ssl-client.xml 178 s3native.blocksize 67108864 179 net.topology.impl org.apache.hadoop.net.NetworkTopology 180 dfs.client.failover.sleep.base.millis 500 181 io.seqfile.compress.blocksize 1000000 182 dfs.namenode.path.based.cache.refresh.interval.ms 30000 183 dfs.namenode.decommission.interval 30 184 dfs.permissions.superusergroup supergroup 185 dfs.namenode.fs-limits.max-directory-items 1048576 186 hadoop.registry.zk.retry.times 5 187 dfs.ha.log-roll.period 120 188 fs.AbstractFileSystem.ftp.impl org.apache.hadoop.fs.ftp.FtpFs 189 ftp.bytes-per-checksum 512 190 dfs.user.home.dir.prefix /user 191 dfs.namenode.checkpoint.edits.dir ${dfs.namenode.checkpoint.dir} 192 dfs.client.socket.send.buffer.size 131072 193 ipc.client.fallback-to-simple-auth-allowed false 194 dfs.blockreport.initialDelay 0 195 dfs.namenode.inotify.max.events.per.rpc 1000 196 dfs.namenode.heartbeat.recheck-interval 300000 197 dfs.namenode.safemode.extension 30000 198 dfs.client.failover.sleep.max.millis 15000 199 dfs.namenode.delegation.key.update-interval 86400000 200 dfs.datanode.transfer.socket.recv.buffer.size 131072 201 hadoop.rpc.protection authentication 202 fs.permissions.umask-mode 022 203 fs.s3.sleepTimeSeconds 10 204 dfs.namenode.fs-limits.max-xattr-size 16384 205 ha.health-monitor.rpc-timeout.ms 45000 206 hadoop.http.staticuser.user dr.who 207 dfs.datanode.http.address 0.0.0.0:50075 208 fs.s3a.connection.maximum 15 209 fs.s3a.paging.maximum 5000 210 fs.AbstractFileSystem.viewfs.impl org.apache.hadoop.fs.viewfs.ViewFs 211 dfs.namenode.blocks.per.postponedblocks.rescan 10000 212 fs.ftp.host 0.0.0.0 213 dfs.lock.suppress.warning.interval 10s 214 hadoop.http.authentication.kerberos.keytab ${user.home}/hadoop.keytab 215 fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem 216 hadoop.registry.zk.root /registry 217 hadoop.jetty.logs.serve.aliases true 218 dfs.namenode.fs-limits.max-blocks-per-file 1048576 219 dfs.balancer.keytab.enabled false 220 dfs.client.block.write.replace-datanode-on-failure.enable true 221 hadoop.http.cross-origin.max-age 1800 222 io.compression.codec.bzip2.library system-native 223 dfs.namenode.checkpoint.dir file://${hadoop.tmp.dir}/dfs/namesecondary 224 dfs.client.use.legacy.blockreader.local false 225 dfs.namenode.top.windows.minutes 1,5,25 226 ipc.ping.interval 60000 227 net.topology.node.switch.mapping.impl org.apache.hadoop.net.ScriptBasedMapping 228 nfs.mountd.port 4242 229 dfs.storage.policy.enabled true 230 dfs.namenode.list.cache.pools.num.responses 100 231 fs.df.interval 60000 232 nfs.server.port 2049 233 ha.zookeeper.parent-znode /hadoop-ha 234 hadoop.http.cross-origin.allowed-headers X-Requested-With,Content-Type,Accept,Origin 235 dfs.datanode.block-pinning.enabled false 236 dfs.namenode.num.checkpoints.retained 2 237 fs.s3a.attempts.maximum 10 238 s3native.stream-buffer-size 4096 239 io.seqfile.local.dir ${hadoop.tmp.dir}/io/local 240 fs.s3n.multipart.copy.block.size 5368709120 241 dfs.encrypt.data.transfer.cipher.key.bitlength 128 242 dfs.client.mmap.retry.timeout.ms 300000 243 dfs.datanode.sync.behind.writes false 244 dfs.namenode.fslock.fair true 245 hadoop.ssl.keystores.factory.class org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory 246 dfs.permissions.enabled true 247 fs.AbstractFileSystem.hdfs.impl org.apache.hadoop.fs.Hdfs 248 dfs.blockreport.split.threshold 1000000 249 dfs.datanode.balance.bandwidthPerSec 1048576 250 dfs.block.scanner.volume.bytes.per.second 1048576 251 hadoop.security.random.device.file.path /dev/urandom 252 fs.s3.maxRetries 4 253 hadoop.http.filter.initializers org.apache.hadoop.http.lib.StaticUserWebFilter 254 dfs.namenode.stale.datanode.interval 30000 255 ipc.client.rpc-timeout.ms 0 256 fs.client.resolve.remote.symlinks true 257 dfs.default.chunk.view.size 32768 258 hadoop.ssl.enabled.protocols TLSv1 259 dfs.namenode.decommission.blocks.per.interval 500000 260 dfs.namenode.handler.count 10 261 dfs.image.transfer.bandwidthPerSec 0 262 rpc.metrics.quantile.enable false 263 hadoop.ssl.enabled false 264 dfs.replication.max 512 265 dfs.namenode.name.dir /home/hadoop/data/hadoopdata/name 266 dfs.namenode.read-lock-reporting-threshold-ms 5000 267 dfs.datanode.https.address 0.0.0.0:50475 268 dfs.datanode.failed.volumes.tolerated 0 269 ipc.client.kill.max 10 270 fs.s3a.threads.max 256 271 ipc.server.listen.queue.size 128 272 dfs.client.domain.socket.data.traffic false 273 dfs.block.access.token.enable false 274 dfs.blocksize 134217728 275 fs.s3a.connection.timeout 50000 276 fs.s3a.threads.keepalivetime 60 277 file.client-write-packet-size 65536 278 dfs.datanode.address 0.0.0.0:50010 279 ha.failover-controller.cli-check.rpc-timeout.ms 20000 280 ha.zookeeper.acl world:anyone:rwcda 281 ipc.client.connect.max.retries 10 282 dfs.encrypt.data.transfer false 283 dfs.namenode.write.stale.datanode.ratio 0.5f 284 ipc.client.ping true 285 dfs.datanode.shared.file.descriptor.paths /dev/shm,/tmp 286 dfs.short.circuit.shared.memory.watcher.interrupt.check.ms 60000 287 hadoop.tmp.dir /home/hadoop/data/hadoopdata 288 dfs.datanode.handler.count 10 289 dfs.client.failover.max.attempts 15 290 dfs.balancer.max-no-move-interval 60000 291 dfs.client.read.shortcircuit.streams.cache.expiry.ms 300000 292 dfs.namenode.block-placement-policy.default.prefer-local-node true 293 hadoop.ssl.require.client.cert false 294 hadoop.security.uid.cache.secs 14400 295 dfs.client.read.shortcircuit.skip.checksum false 296 dfs.namenode.resource.checked.volumes.minimum 1 297 hadoop.registry.rm.enabled false 298 dfs.namenode.quota.init-threads 4 299 dfs.namenode.max.extra.edits.segments.retained 10000 300 dfs.webhdfs.user.provider.user.pattern ^[A-Za-z_][A-Za-z0-9._-]*[$]?$ 301 dfs.client.mmap.enabled true 302 dfs.client.file-block-storage-locations.timeout.millis 1000 303 dfs.datanode.block.id.layout.upgrade.threads 12 304 dfs.datanode.use.datanode.hostname false 305 hadoop.fuse.timer.period 5 306 dfs.client.context default 307 fs.trash.checkpoint.interval 0 308 dfs.journalnode.http-address 0.0.0.0:8480 309 dfs.balancer.address 0.0.0.0:0 310 dfs.namenode.lock.detailed-metrics.enabled false 311 dfs.namenode.delegation.token.renew-interval 86400000 312 ha.health-monitor.check-interval.ms 1000 313 dfs.namenode.retrycache.heap.percent 0.03f 314 ipc.client.connect.timeout 20000 315 dfs.reformat.disabled false 316 dfs.blockreport.intervalMsec 21600000 317 fs.s3a.multipart.threshold 2147483647 318 dfs.https.server.keystore.resource ssl-server.xml 319 hadoop.http.cross-origin.enabled false 320 io.map.index.skip 0 321 dfs.balancer.block-move.timeout 0 322 io.native.lib.available true 323 s3.replication 3 324 dfs.namenode.kerberos.internal.spnego.principal ${dfs.web.authentication.kerberos.principal} 325 fs.AbstractFileSystem.har.impl org.apache.hadoop.fs.HarFs 326 hadoop.security.kms.client.encrypted.key.cache.num.refill.threads 2 327 fs.s3n.multipart.uploads.block.size 67108864 328 dfs.image.compress false 329 dfs.datanode.dns.interface default 330 dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction 0.75f 331 tfile.fs.output.buffer.size 262144 332 fs.du.interval 600000 333 dfs.client.failover.connection.retries 0 334 dfs.namenode.edit.log.autoroll.multiplier.threshold 2.0 335 hadoop.security.group.mapping.ldap.ssl false 336 dfs.namenode.top.window.num.buckets 10 337 fs.s3a.buffer.dir ${hadoop.tmp.dir}/s3a 338 dfs.namenode.checkpoint.check.period 60 339 fs.defaultFS hdfs://hadoop1:9000 340 fs.s3a.multipart.size 104857600 341 dfs.client.slow.io.warning.threshold.ms 30000 342 dfs.datanode.max.locked.memory 0 343 dfs.namenode.retrycache.expirytime.millis 600000 344 hadoop.security.group.mapping.ldap.search.attr.group.name cn 345 dfs.client.block.write.replace-datanode-on-failure.best-effort false 346 dfs.ha.fencing.ssh.connect-timeout 30000 347 dfs.datanode.scan.period.hours 504 348 hadoop.registry.zk.quorum localhost:2181 349 dfs.namenode.fs-limits.max-component-length 255 350 hadoop.http.cross-origin.allowed-origins * 351 dfs.namenode.enable.retrycache true 352 dfs.datanode.du.reserved 0 353 dfs.datanode.ipc.address 0.0.0.0:50020 354 hadoop.registry.system.acls sasl:yarn@, sasl:mapred@, sasl:hdfs@ 355 dfs.namenode.path.based.cache.retry.interval.ms 30000 356 hadoop.security.crypto.cipher.suite AES/CTR/NoPadding 357 dfs.client.block.write.replace-datanode-on-failure.policy DEFAULT 358 dfs.namenode.http-address 0.0.0.0:50070 359 hadoop.security.crypto.codec.classes.aes.ctr.nopadding org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec,org.apache.hadoop.crypto.JceAesCtrCryptoCodec 360 dfs.ha.tail-edits.period 60 361 hadoop.security.groups.negative-cache.secs 30 362 hadoop.ssl.server.conf ssl-server.xml 363 hadoop.registry.jaas.context Client 364 s3native.replication 3 365 hadoop.security.group.mapping.ldap.search.filter.group (objectClass=group) 366 hadoop.http.authentication.kerberos.principal HTTP/_HOST@LOCALHOST 367 dfs.namenode.startup.delay.block.deletion.sec 0 368 hadoop.security.group.mapping.ldap.search.filter.user (&(objectClass=user)(sAMAccountName={0})) 369 dfs.namenode.edits.dir ${dfs.namenode.name.dir} 370 dfs.namenode.checkpoint.max-retries 3 371 s3.stream-buffer-size 4096 372 ftp.client-write-packet-size 65536 373 dfs.datanode.fsdatasetcache.max.threads.per.volume 4 374 hadoop.security.sensitive-config-keys password$,fs.s3.*[Ss]ecret.?[Kk]ey,fs.azure.account.key.*,dfs.webhdfs.oauth2.[a-z]+.token,hadoop.security.sensitive-config-keys 375 dfs.namenode.decommission.max.concurrent.tracked.nodes 100 376 dfs.namenode.name.dir.restore false 377 ipc.server.log.slow.rpc false 378 dfs.heartbeat.interval 3 379 dfs.namenode.secondary.http-address hadoop3:50090 380 ha.zookeeper.session-timeout.ms 5000 381 s3.bytes-per-checksum 512 382 fs.s3a.connection.ssl.enabled true 383 hadoop.http.authentication.signature.secret.file ${user.home}/hadoop-http-auth-signature-secret 384 hadoop.fuse.connection.timeout 300 385 dfs.namenode.checkpoint.period 3600 386 ipc.server.max.connections 0 387 dfs.ha.automatic-failover.enabled false
3、列出指定目錄下的檔案以及塊的資訊
1 package com.exam.hdfs; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.BlockLocation; 5 import org.apache.hadoop.fs.FileSystem; 6 import org.apache.hadoop.fs.LocatedFileStatus; 7 import org.apache.hadoop.fs.Path; 8 import org.apache.hadoop.fs.RemoteIterator; 9 10 public class TestHDFS1 { 11 12 public static void main(String[] args) throws Exception { 13 14 Configuration conf = new Configuration(); 15 System.setProperty("HADOOP_USER_NAME", "hadoop"); 16 conf.set("fs.defaultFS", "hdfs://hadoop1:9000"); 17 FileSystem fs = FileSystem.get(conf); 18 19 /** 20 * 列出指定的目錄下的所有檔案 21 */ 22 RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true); 23 while(listFiles.hasNext()){ 24 LocatedFileStatus file = listFiles.next(); 25 26 27 System.out.println(file.getPath()+"\t"); 28 System.out.println(file.getPath().getName()+"\t"); 29 System.out.println(file.getLen()+"\t"); 30 System.out.println(file.getReplication()+"\t"); 31 32 /** 33 * blockLocations的長度是幾? 是什麼意義? 34 * 35 * 塊的數量 36 */ 37 BlockLocation[] blockLocations = file.getBlockLocations(); 38 System.out.println(blockLocations.length+"\t"); 39 40 for(BlockLocation bl : blockLocations){ 41 String[] hosts = bl.getHosts(); 42 43 System.out.print(hosts[0] + "-" + hosts[1]+"\t"); 44 } 45 System.out.println(); 46 47 } 48 49 50 } 51 }
輸出結果
1 hdfs://hadoop1:9000/aa/bb/cc/hadoop.tar.gz 2 hadoop.tar.gz 3 199007110 4 2 5 3 6 hadoop3-hadoop1 hadoop1-hadoop2 hadoop1-hadoop4
4、上傳檔案
1 package com.exam.hdfs; 2 3 import java.io.File; 4 import java.io.FileInputStream; 5 import java.io.InputStream; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.fs.FSDataOutputStream; 9 import org.apache.hadoop.fs.FileSystem; 10 import org.apache.hadoop.fs.Path; 11 import org.apache.hadoop.io.IOUtils; 12 13 public class UploadDataByStream { 14 15 public static void main(String[] args) throws Exception { 16 17 18 Configuration conf = new Configuration(); 19 System.setProperty("HADOOP_USER_NAME", "hadoop"); 20 conf.set("fs.defaultFS", "hdfs://hadoop1:9000"); 21 FileSystem fs = FileSystem.get(conf); 22 23 24 InputStream in = new FileInputStream(new File("d:/abc.tar.gz")); 25 FSDataOutputStream out = fs.create(new Path("/aa/abc.tar.gz")); 26 27 28 IOUtils.copyBytes(in, out, 4096, true); 29 30 fs.close(); 31 32 } 33 }
5、下載檔案
1 package com.exam.hdfs; 2 3 import java.io.File; 4 import java.io.FileOutputStream; 5 import java.io.OutputStream; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.fs.FSDataInputStream; 9 import org.apache.hadoop.fs.FileSystem; 10 import org.apache.hadoop.fs.Path; 11 import org.apache.hadoop.io.IOUtils; 12 13 public class DownloadDataByStream { 14 15 16 public static void main(String[] args) throws Exception { 17 18 Configuration conf = new Configuration(); 19 System.setProperty("HADOOP_USER_NAME", "hadoop"); 20 conf.set("fs.defaultFS", "hdfs://hadoop1:9000"); 21 FileSystem fs = FileSystem.get(conf); 22 23 24 FSDataInputStream in = fs.open(new Path("/aa/abc.tar.gz")); 25 OutputStream out = new FileOutputStream(new File("D:/abc.sh")); 26 27 28 IOUtils.copyBytes(in, out, 4096, true); 29 30 fs.close(); 31 32 } 33 }
6、刪除某個路徑下特定型別的檔案,比如class型別檔案,比如txt型別檔案
1 package com.exam.hdfs; 2 3 import java.net.URI; 4 5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.fs.FileStatus; 7 import org.apache.hadoop.fs.FileSystem; 8 import org.apache.hadoop.fs.Path; 9 10 public class HDFS_DELETE_CLASS { 11 12 public static final String FILETYPE = "tar.gz"; 13 public static final String DELETE_PATH = "/aa"; 14 15 public static void main(String[] args) throws Exception { 16 17 new HDFS_DELETE_CLASS().rmrClassFile(new Path(DELETE_PATH)); 18 } 19 20 public void rmrClassFile(Path path) throws Exception{ 21 22 // 首先獲取叢集必要的資訊,以得到FileSystem的示例物件fs 23 Configuration conf = new Configuration(); 24 FileSystem fs = FileSystem.get(new URI("hdfs://hadoop1:9000"), conf, "hadoop"); 25 26 // 首先檢查path本身是資料夾還是目錄 27 FileStatus fileStatus = fs.getFileStatus(path); 28 boolean directory = fileStatus.isDirectory(); 29 30 // 根據該目錄是否是檔案或者資料夾進行相應的操作 31 if(directory){ 32 // 如果是目錄 33 checkAndDeleteDirectory(path, fs); 34 }else{ 35 // 如果是檔案,檢查該檔名是不是FILETYPE型別的檔案 36 checkAndDeleteFile(path, fs); 37 } 38 } 39 40 // 處理目錄 41 public static void checkAndDeleteDirectory(Path path, FileSystem fs) throws Exception{ 42 // 檢視該path目錄下一級子目錄和子檔案的狀態 43 FileStatus[] listStatus = fs.listStatus(path); 44 for(FileStatus fStatus: listStatus){ 45 Path p = fStatus.getPath(); 46 // 如果是檔案,並且是以FILETYPE結尾,則刪掉,否則繼續遍歷下一級目錄 47 if(fStatus.isFile()){ 48 checkAndDeleteFile(p, fs); 49 }else{ 50 checkAndDeleteDirectory(p, fs); 51 } 52 } 53 } 54 55 // 檢查檔案是否符合刪除要求,如果符合要求則刪除,不符合要求則不做處理 56 public static void checkAndDeleteFile(Path path, FileSystem fs) throws Exception{ 57 String name = path.getName(); 58 System.out.println(name); 59 /*// 直接判斷有沒有FILETYPE這個字串,不是特別穩妥,並且會有誤操作,所以得判斷是不是以FILETYPE結尾 60 if(name.indexOf(FILETYPE) != -1){ 61 fs.delete(path, true); 62 }*/ 63 // 判斷是不是以FILETYPE結尾 64 int startIndex = name.length() - FILETYPE.length(); 65 int endIndex = name.length(); 66 // 求得檔案字尾名 67 String fileSuffix = name.substring(startIndex, endIndex); 68 if(fileSuffix.equals(FILETYPE)){ 69 fs.delete(path, true); 70 } 71 } 72 }
7、刪除HDFS叢集中的所有空檔案和空目錄
1 public class DeleteEmptyDirAndFile { 2 3 static FileSystem fs = null; 4 5 public static void main(String[] args) throws Exception { 6 7 initFileSystem(); 8 9 // 建立測試資料 10 // makeTestData(); 11 12 // 刪除測試資料 13 // deleteTestData(); 14 15 // 刪除指定資料夾下的空檔案和空資料夾 16 deleteEmptyDirAndFile(new Path("/aa")); 17 } 18 19 /** 20 * 刪除指定資料夾下的 空檔案 和 空資料夾 21 * @throws Exception 22 */ 23 public static void deleteEmptyDirAndFile(Path path) throws Exception { 24 25 //當是空資料夾時 26 FileStatus[] listStatus = fs.listStatus(path); 27 if(listStatus.length == 0){ 28 fs.delete(path, true); 29 return; 30 } 31 32 // 該方法的結果:包括指定目錄的 檔案 和 資料夾 33 RemoteIterator<LocatedFileStatus> listLocatedStatus = fs.listLocatedStatus(path); 34 35 while (listLocatedStatus.hasNext()) { 36 LocatedFileStatus next = listLocatedStatus.next(); 37 38 Path currentPath = next.getPath(); 39 // 獲取父目錄 40 Path parent = next.getPath().getParent(); 41 42 // 如果是資料夾,繼續往下遍歷,刪除符合條件的檔案(空資料夾) 43 if (next.isDirectory()) { 44 45 // 如果是空資料夾 46 if(fs.listStatus(currentPath).length == 0){ 47 // 刪除掉 48 fs.delete(currentPath, true); 49 }else{ 50 // 不是空資料夾,那麼則繼續遍歷 51 if(fs.exists(currentPath)){ 52 deleteEmptyDirAndFile(currentPath); 53 } 54 } 55 56 // 如果是檔案 57 } else { 58 // 獲取檔案的長度 59 long fileLength = next.getLen(); 60 // 當檔案是空檔案時, 刪除 61 if(fileLength == 0){ 62 fs.delete(currentPath, true); 63 } 64 } 65 66 // 當空資料夾或者空檔案刪除時,有可能導致父資料夾為空資料夾, 67 // 所以每次刪除一個空檔案或者空檔案的時候都需要判斷一下,如果真是如此,那麼就需要把該資料夾也刪除掉 68 int length = fs.listStatus(parent).length; 69 if(length == 0){ 70 fs.delete(parent, true); 71 } 72 } 73 } 74 75 /** 76 * 初始化FileSystem物件之用 77 */ 78 public static void initFileSystem() throws Exception{ 79 Configuration conf = new Configuration(); 80 System.setProperty("HADOOP_USER_NAME", "hadoop"); 81 conf.addResource("config/core-site.xml"); 82 conf.addResource("config/hdfs-site.xml"); 83 fs = FileSystem.get(conf); 84 } 85 86 /** 87 * 建立 測試 資料之用 88 */ 89 public static void makeTestData() throws Exception { 90 91 String emptyFilePath = "D:\\bigdata\\1704mr_test\\empty.txt"; 92 String notEmptyFilePath = "D:\\bigdata\\1704mr_test\\notEmpty.txt"; 93 94 // 空資料夾 和 空檔案 的目錄 95 String path1 = "/aa/bb1/cc1/dd1/"; 96 fs.mkdirs(new Path(path1)); 97 fs.mkdirs(new Path("/aa/bb1/cc1/dd2/")); 98 fs.copyFromLocalFile(new Path(emptyFilePath), new Path(path1)); 99 fs.copyFromLocalFile(new Path(notEmptyFilePath), new Path(path1)); 100 101 // 空檔案 的目錄 102 String path2 = "/aa/bb1/cc2/dd2/"; 103 fs.mkdirs(new Path(path2)); 104 fs.copyFromLocalFile(new Path(emptyFilePath), new Path(path2)); 105 106 // 非空檔案 的目錄 107 String path3 = "/aa/bb2/cc3/dd3"; 108 fs.mkdirs(new Path(path3)); 109 fs.copyFromLocalFile(new Path(notEmptyFilePath), new Path(path3)); 110 111 // 空 資料夾 112 String path4 = "/aa/bb2/cc4/dd4"; 113 fs.mkdirs(new Path(path4)); 114 115 System.out.println("測試資料建立成功"); 116 } 117 118 /** 119 * 刪除 指定資料夾 120 * @throws Exception 121 */ 122 public static void deleteTestData() throws Exception { 123 boolean delete = fs.delete(new Path("/aa"), true); 124 System.out.println(delete ? "刪除資料成功" : "刪除資料失敗"); 125 } 126 127 }
8、手動拷貝某個特定的資料塊(比如某個檔案的第二個資料塊)
1 /** 2 * 手動拷貝某個特定的資料塊(比如某個檔案的第二個資料塊) 3 * */ 4 public static void copyBlock(String str,int num) { 5 6 Path path = new Path(str); 7 8 BlockLocation[] localtions = new BlockLocation[0] ; 9 10 try { 11 FileStatus fileStatus = fs.getFileStatus(path); 12 13 localtions = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); 14 15 /*for(int i=0;i<localtions.length;i++) { 16 //0,134217728,hadoop1,hadoop3 17 //134217728,64789382,hadoop3,hadoop1 18 System.out.println(localtions[i]); 19 }*/ 20 21 /*System.out.println(localtions[num-1].getOffset()); 22 System.out.println(localtions[num-1].getLength()); 23 String[] hosts = localtions[num-1].getHosts();*/ 24 25 FSDataInputStream open = fs.open(path); 26 open.seek(localtions[num-1].getOffset()); 27 OutputStream out = new FileOutputStream(new File("D:/abc.tar.gz")); 28 IOUtils.copyBytes(open, out,4096,true); 29 30 31 32 } catch (IOException e) { 33 e.printStackTrace(); 34 } 35 36 }
9、編寫程式統計出HDFS檔案系統中檔案大小小於HDFS叢集中的預設塊大小的檔案佔比
1 import org.apache.hadoop.conf.Configuration; 2 import org.apache.hadoop.fs.FileSystem; 3 import org.apache.hadoop.fs.LocatedFileStatus; 4 import org.apache.hadoop.fs.Path; 5 import org.apache.hadoop.fs.RemoteIterator; 6 7 /** 8 * 9 * 編寫程式統計出HDFS檔案系統中檔案大小小於HDFS叢集中的預設塊大小的檔案佔比 10 * 比如:大於等於128M的檔案個數為98,小於128M的檔案總數為2,所以答案是2% 11 */ 12 public class Exam1_SmallFilePercent { 13 14 private static int DEFAULT_BLOCKSIZE = 128 * 1024 * 1024; 15 16 public static void main(String[] args) throws Exception { 17 18 19 Configuration conf = new Configuration(); 20 conf.set("fs.defaultFS", "hdfs://hadoop1:9000"); 21 System.setProperty("HADOOP_USER_NAME", "hadoop"); 22 FileSystem fs = FileSystem.get(conf); 23 24 25 Path path = new Path("/"); 26 float smallFilePercent = getSmallFilePercent(fs, path); 27 System.out.println(smallFilePercent); 28 29 30 fs.close(); 31 } 32 33 /** 34 * 該方法求出指定目錄下的小檔案和總檔案數的對比 35 * @throws Exception 36 */ 37 private static float getSmallFilePercent(FileSystem fs, Path path) throws Exception { 38 // TODO Auto-generated method stub 39 40 int smallFile = 0; 41 int totalFile = 0; 42 43 RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false); 44 while(listFiles.hasNext()){ 45 totalFile++; 46 LocatedFileStatus next = listFiles.next(); 47 long len = next.getLen(); 48 if(len < DEFAULT_BLOCKSIZE){ 49 smallFile++; 50 } 51 } 52 System.out.println(smallFile+" : "+totalFile); 53 54 return smallFile * 1f /totalFile; 55 } 56 57 }
10、編寫程式統計出HDFS檔案系統中的平均資料塊數(資料塊總數/檔案總數)
1 import org.apache.hadoop.conf.Configuration; 2 import org.apache.hadoop.fs.FileSystem; 3 import org.apache.hadoop.fs.LocatedFileStatus; 4 import org.apache.hadoop.fs.Path; 5 import org.apache.hadoop.fs.RemoteIterator; 6 7 /** 8 * 9 * 編寫程式統計出HDFS檔案系統中的平均資料塊數(資料塊總數/檔案總數) 10 * 比如:一個檔案有5個塊,一個檔案有3個塊,那麼平均資料塊數為4 11 * 如果還有一個檔案,並且資料塊就1個,那麼整個HDFS的平均資料塊數就是3 12 */ 13 public class Exam2_HDSFAvgBlocks { 14 15 public static void main(String[] args) throws Exception { 16 17 18 Configuration conf = new Configuration(); 19 conf.set("fs.defaultFS", "hdfs://hadoop1:9000"); 20 System.setProperty("HADOOP_USER_NAME", "hadoop"); 21 FileSystem fs = FileSystem.get(conf); 22 23 24 Path path = new Path("/"); 25 float avgHDFSBlocks = getHDFSAvgBlocks(fs, path); 26 System.out.println("HDFS的平均資料塊個數為:" + avgHDFSBlocks); 27 28 29 fs.close(); 30 } 31 32 /** 33 * 求出指定目錄下的所有檔案的平均資料塊個數 34 */ 35 private static float getHDFSAvgBlocks(FileSystem fs, Path path) throws Exception { 36 // TODO Auto-generated method stub 37 38 int totalFiles = 0; // 總檔案數 39 int totalBlocks = 0; // 總資料塊數 40 41 RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false); 42 43 while(listFiles.hasNext()){ 44 LocatedFileStatus next = listFiles.next(); 45 int length = next.getBlockLocations().length; 46 totalBlocks += length; 47 if(next.getLen() != 0){ 48 totalFiles++; 49 } 50 } 51 System.out.println(totalBlocks+" : "+totalFiles); 52 53 return totalBlocks * 1f / totalFiles; 54 } 55 56 }
11、編寫程式統計出HDFS檔案系統中的平均副本數(副本總數/總資料塊數)
1 import org.apache.hadoop.conf.Configuration; 2 import org.apache.hadoop.fs.FileSystem; 3 import org.apache.hadoop.fs.LocatedFileStatus; 4 import org.apache.hadoop.fs.Path; 5 import org.apache.hadoop.fs.RemoteIterator; 6 7 /** 8 * 編寫程式統計出HDFS檔案系統中的平均副本數(副本總數/總資料塊數) 9 * 比如:總共兩個檔案,一個檔案5個資料塊,每個資料塊3個副本,第二個檔案2個資料塊,每個檔案2個副本,最終的平均副本數 = (3*3 + 2*2)/(3+2)= 2.8 10 */ 11 public class Exam3_HDSFAvgBlockCopys { 12 13 public static void main(String[] args) throws Exception { 14 15 16 Configuration conf = new Configuration(); 17 conf.set("fs.defaultFS", "hdfs://hadoop02:9000"); 18 System.setProperty("HADOOP_USER_NAME", "hadoop"); 19 FileSystem fs = FileSystem.get(conf); 20 21 22 Path path = new Path("/"); 23 float avgHDFSBlockCopys = getHDFSAvgBlockCopys(fs, path); 24 System.out.println("HDFS的平均資料塊個數為:" + avgHDFSBlockCopys); 25 26 27 fs.close(); 28 } 29 30 /** 31 * 求出指定目錄下的所有檔案的平均資料塊個數 32 */ 33 private static float getHDFSAvgBlockCopys(FileSystem fs, Path path) throws Exception { 34 // TODO Auto-generated method stub 35 36 int totalCopy = 0; // 總副本數 37 int totalBlocks = 0; // 總資料塊數 38 39 RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false); 40 41 while(listFiles.hasNext()){ 42 LocatedFileStatus next = listFiles.next(); 43 44 int length = next.getBlockLocations().length; 45 short replication = next.getReplication(); 46 47 totalBlocks += length; 48 totalCopy += length * replication; 49 } 50 System.out.println(totalCopy+" : "+totalBlocks); 51 52 return totalCopy * 1f / totalBlocks; 53 } 54 55 }
12、統計HDFS整個檔案系統中的不足指定資料塊大小的資料塊的比例
1 import java.io.IOException; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.BlockLocation; 5 import org.apache.hadoop.fs.FileSystem; 6 import org.apache.hadoop.fs.LocatedFileStatus; 7 import org.apache.hadoop.fs.Path; 8 import org.apache.hadoop.fs.RemoteIterator; 9 10 /** 11 * 統計HDFS整個檔案系統中的不足指定資料塊大小的資料塊的比例 12 * 比如指定的資料塊大小是128M,總資料塊有100個,不是大小為完整的128M的資料塊有5個,那麼不足指定資料塊大小的資料塊的比例就為5% 13 * 注意:千萬注意考慮不同檔案的指定資料塊大小可能不一致。所以千萬不能用預設的128M一概而論 14 */ 15 public class Exam4_LTBlockSize { 16 17 public static void main(String[] args) throws Exception { 18 19 Configuration conf = new Configuration(); 20 conf.set("fs.defaultFS", "hdfs://hadoop02:9000"); 21 System.setProperty("HADOOP_USER_NAME", "hadoop"); 22 FileSystem fs = FileSystem.get(conf); 23 24 Path path = new Path("/"); 25 float avgHDFSBlockCopys = getLessThanBlocksizeBlocks(fs, path); 26 System.out.println("HDFS的不足指定資料塊大小的資料塊數目為:" + avgHDFSBlockCopys); 27 28 fs.close(); 29 } 30 31 private static float getLessThanBlocksizeBlocks(FileSystem fs, Path path) throws Exception { 32 // TODO Auto-generated method stub 33 34 int totalBlocks = 0; // 總副本數 35 int lessThenBlocksizeBlocks = 0; // 總資料塊數 36 37 RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(path, false); 38 39 while(listFiles.hasNext()){ 40 LocatedFileStatus next = listFiles.next(); 41 42 BlockLocation[] blockLocations = next.getBlockLocations(); 43 int length = blockLocations.length; 44 45 if(length != 0){ 46 totalBlocks += length; 47 long lastBlockSize = blockLocations[length - 1].getLength(); 48 long blockSize = next.getBlockSize(); 49 if(lastBlockSize < blockSize){ 50 lessThenBlocksizeBlocks++; 51 } 52 } 53 } 54 System.out.println(lessThenBlocksizeBlocks+" : "+totalBlocks); 55 56 return lessThenBlocksizeBlocks * 1f / totalBlocks; 57 } 58 }
13、統計出一個給定陣列的蓄水總量(把陣列的每個位置的數看是做地勢高低)
1 /** 2 統計出一個給定陣列的蓄水總量(把陣列的每個位置的數看是做地勢高低) 3 比如:int[] intArray = new int[]{4,3,2,5,6,4,4,7} 4 能蓄水:[0,1,2,0,0,2,2,0] 所以總量是:7 5 6 核心思路:把陣列切成很多個 01陣列,每一層一個01陣列,統計每個01陣列中的合法0的總個數(陣列的左邊第一個1的中間區間中的0的個數)即可 7 */ 8 public class Exam5_WaterStoreOfArray { 9 10 public static void main(String[] args) { 11 12 // int[] intArray = new int[]{4,3,2,5,6,4,4,7}; 13 // int[] intArray = new int[]{1,2,3,4,5,6}; 14 int[] intArray = new int[]{3,1,2,7,3,8,4,9,5,6}; 15 16 int totalWater = getArrayWater(intArray); 17 System.out.println(totalWater); 18 } 19 20 /** 21 * 求出陣列中的水數 22 */ 23 private static int getArrayWater(int[] intArray) { 24 25 int findMaxValueOfArray = findMaxValueOfArray(intArray); 26 int findMinValueOfArray = findMinValueOfArray(intArray); 27 int length = intArray.length; 28 29 int totalWater = 0; 30 31 // 迴圈次數就是最大值和最小值的差 32 for(int i=findMinValueOfArray; i<findMaxValueOfArray; i++){ 33 // 迴圈構造每一層的01陣列 34 int[] tempArray = new int[length]; 35 for(int j=0; j<length; j++){ 36 if(intArray[j] > i){ 37 tempArray[j] = 1; 38 }else{ 39 tempArray[j] = 0; 40 } 41 } 42 // 獲取每一個01陣列的合法0個數 43 int waterOfOneZeroArray = getWaterOfOneZeroArray(tempArray); 44 totalWater += waterOfOneZeroArray; 45 } 46 return totalWater; 47 } 48 49 50 /** 51 * 尋找邏輯是:從左右開始各找一個1,然後這兩個1之間的所有0的個數,就是水數 52 */ 53 private static int getWaterOfOneZeroArray(int[] tempArray) { 54 55 int length = tempArray.length; 56 int toatalWater = 0; 57 58 // 找左邊的1 59 int i = 0; 60 while(i < length){ 61 if(tempArray[i] == 1){ 62 break; 63 } 64 i++; 65 } 66 67 // 從右邊開始找1 68 int j=length-1; 69 while(j >= i){ 70 if(tempArray[j] == 1){ 71 break; 72 } 73 j--; 74 } 75 76 // 找以上兩個1之間的0的個數。 77 if(i == j || i + 1 == j){ 78 return 0; 79 }else{ 80 for(int k=i+1; k<j; k++){ 81 if(tempArray[k] == 0){ 82 toatalWater++; 83 } 84 } 85 return toatalWater; 86 } 87 } 88 89 /** 90 * 91 * 描述:找出一個陣列中的最大值 92 */ 93 public static int findMaxValueOfArray(int[] intArray){ 94 int length = intArray.length; 95 if(length == 0){ 96 return 0; 97 }else if(length == 1){ 98 return intArray[0]; 99 }else{ 100 int max = intArray[0]; 101 for(int i=1; i<length; i++){ 102 if(intArray[i] > max){ 103 max = intArray[i]; 104 } 105 } 106 return max; 107 } 108 } 109 110 /** 111 * 找出一個陣列中的最小值 112 */ 113 public static int findMinValueOfArray(int[] intArray){ 114 int length = intArray.length; 115 if(length == 0){ 116 return 0; 117 }else if(length == 1){ 118 return intArray[0]; 119 }else{ 120 int min = intArray[0]; 121 for(int i=1; i<length; i++){ 122 if(intArray[i] < min){ 123 min = intArray[i]; 124 } 125 } 126 return min; 127 } 128 } 129 }