2016-12-21 19:30:02 -05:00

20415 lines
548 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From acaf2eec0ec9559f47c08163cef938ae95d5db92 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 3 Mar 2015 02:18:01 -0600
Subject: [PATCH] fs: f2fs: bring up to date with Jaegeuk's branch
Change-Id: Id26978e9f0bfb4878a6aeeca2999162553a037f0
---
Documentation/ABI/testing/sysfs-fs-f2fs | 82 +
Documentation/filesystems/00-INDEX | 2 +
Documentation/filesystems/f2fs.txt | 570 ++++++
MAINTAINERS | 12 +
arch/arm/configs/cyanogenmod_bacon_defconfig | 6 +
arch/arm/configs/cyanogenmod_find7_defconfig | 6 +
fs/Kconfig | 1 +
fs/Makefile | 1 +
fs/f2fs/Kconfig | 83 +
fs/f2fs/Makefile | 8 +
fs/f2fs/acl.c | 403 +++++
fs/f2fs/acl.h | 59 +
fs/f2fs/checkpoint.c | 1136 ++++++++++++
fs/f2fs/data.c | 1253 ++++++++++++++
fs/f2fs/debug.c | 401 +++++
fs/f2fs/dir.c | 805 +++++++++
fs/f2fs/f2fs.h | 1711 ++++++++++++++++++
fs/f2fs/file.c | 1175 +++++++++++++
fs/f2fs/gc.c | 746 ++++++++
fs/f2fs/gc.h | 110 ++
fs/f2fs/hash.c | 104 ++
fs/f2fs/inline.c | 527 ++++++
fs/f2fs/inode.c | 361 ++++
fs/f2fs/namei.c | 545 ++++++
fs/f2fs/node.c | 2072 ++++++++++++++++++++++
fs/f2fs/node.h | 415 +++++
fs/f2fs/recovery.c | 563 ++++++
fs/f2fs/segment.c | 2386 ++++++++++++++++++++++++++
fs/f2fs/segment.h | 750 ++++++++
fs/f2fs/super.c | 1324 ++++++++++++++
fs/f2fs/trace.c | 159 ++
fs/f2fs/trace.h | 46 +
fs/f2fs/xattr.c | 615 +++++++
fs/f2fs/xattr.h | 154 ++
include/linux/f2fs_fs.h | 475 +++++
include/linux/magic.h | 1 +
include/trace/events/f2fs.h | 1014 +++++++++++
37 files changed, 20081 insertions(+)
create mode 100644 Documentation/ABI/testing/sysfs-fs-f2fs
create mode 100644 Documentation/filesystems/f2fs.txt
create mode 100644 fs/f2fs/Kconfig
create mode 100644 fs/f2fs/Makefile
create mode 100644 fs/f2fs/acl.c
create mode 100644 fs/f2fs/acl.h
create mode 100644 fs/f2fs/checkpoint.c
create mode 100644 fs/f2fs/data.c
create mode 100644 fs/f2fs/debug.c
create mode 100644 fs/f2fs/dir.c
create mode 100644 fs/f2fs/f2fs.h
create mode 100644 fs/f2fs/file.c
create mode 100644 fs/f2fs/gc.c
create mode 100644 fs/f2fs/gc.h
create mode 100644 fs/f2fs/hash.c
create mode 100644 fs/f2fs/inline.c
create mode 100644 fs/f2fs/inode.c
create mode 100644 fs/f2fs/namei.c
create mode 100644 fs/f2fs/node.c
create mode 100644 fs/f2fs/node.h
create mode 100644 fs/f2fs/recovery.c
create mode 100644 fs/f2fs/segment.c
create mode 100644 fs/f2fs/segment.h
create mode 100644 fs/f2fs/super.c
create mode 100644 fs/f2fs/trace.c
create mode 100644 fs/f2fs/trace.h
create mode 100644 fs/f2fs/xattr.c
create mode 100644 fs/f2fs/xattr.h
create mode 100644 include/linux/f2fs_fs.h
create mode 100644 include/trace/events/f2fs.h
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
new file mode 100644
index 0000000..2c4cc42
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -0,0 +1,82 @@
+What: /sys/fs/f2fs/<disk>/gc_max_sleep_time
+Date: July 2013
+Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
+Description:
+ Controls the maximun sleep time for gc_thread. Time
+ is in milliseconds.
+
+What: /sys/fs/f2fs/<disk>/gc_min_sleep_time
+Date: July 2013
+Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
+Description:
+ Controls the minimum sleep time for gc_thread. Time
+ is in milliseconds.
+
+What: /sys/fs/f2fs/<disk>/gc_no_gc_sleep_time
+Date: July 2013
+Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
+Description:
+ Controls the default sleep time for gc_thread. Time
+ is in milliseconds.
+
+What: /sys/fs/f2fs/<disk>/gc_idle
+Date: July 2013
+Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
+Description:
+ Controls the victim selection policy for garbage collection.
+
+What: /sys/fs/f2fs/<disk>/reclaim_segments
+Date: October 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the issue rate of segment discard commands.
+
+What: /sys/fs/f2fs/<disk>/ipu_policy
+Date: November 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the in-place-update policy.
+
+What: /sys/fs/f2fs/<disk>/min_ipu_util
+Date: November 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the FS utilization condition for the in-place-update
+ policies.
+
+What: /sys/fs/f2fs/<disk>/min_fsync_blocks
+Date: September 2014
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the dirty page count condition for the in-place-update
+ policies.
+
+What: /sys/fs/f2fs/<disk>/max_small_discards
+Date: November 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the issue rate of small discard commands.
+
+What: /sys/fs/f2fs/<disk>/max_victim_search
+Date: January 2014
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the number of trials to find a victim segment.
+
+What: /sys/fs/f2fs/<disk>/dir_level
+Date: March 2014
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the directory level for large directory.
+
+What: /sys/fs/f2fs/<disk>/ram_thresh
+Date: March 2014
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the memory footprint used by f2fs.
+
+What: /sys/fs/f2fs/<disk>/trim_sections
+Date: February 2015
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the trimming rate in batch mode.
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 8c624a1..bc25282 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -46,6 +46,8 @@ ext3.txt
- info, mount options and specifications for the Ext3 filesystem.
ext4.txt
- info, mount options and specifications for the Ext4 filesystem.
+f2fs.txt
+ - info and mount options for the F2FS filesystem.
files.txt
- info on file management in the Linux kernel.
fuse.txt
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
new file mode 100644
index 0000000..dac11d7
--- /dev/null
+++ b/Documentation/filesystems/f2fs.txt
@@ -0,0 +1,570 @@
+================================================================================
+WHAT IS Flash-Friendly File System (F2FS)?
+================================================================================
+
+NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
+been equipped on a variety systems ranging from mobile to server systems. Since
+they are known to have different characteristics from the conventional rotating
+disks, a file system, an upper layer to the storage device, should adapt to the
+changes from the sketch in the design level.
+
+F2FS is a file system exploiting NAND flash memory-based storage devices, which
+is based on Log-structured File System (LFS). The design has been focused on
+addressing the fundamental issues in LFS, which are snowball effect of wandering
+tree and high cleaning overhead.
+
+Since a NAND flash memory-based storage device shows different characteristic
+according to its internal geometry or flash memory management scheme, namely FTL,
+F2FS and its tools support various parameters not only for configuring on-disk
+layout, but also for selecting allocation and cleaning algorithms.
+
+The following git tree provides the file system formatting tool (mkfs.f2fs),
+a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs).
+>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
+
+For reporting bugs and sending patches, please use the following mailing list:
+>> linux-f2fs-devel@lists.sourceforge.net
+
+================================================================================
+BACKGROUND AND DESIGN ISSUES
+================================================================================
+
+Log-structured File System (LFS)
+--------------------------------
+"A log-structured file system writes all modifications to disk sequentially in
+a log-like structure, thereby speeding up both file writing and crash recovery.
+The log is the only structure on disk; it contains indexing information so that
+files can be read back from the log efficiently. In order to maintain large free
+areas on disk for fast writing, we divide the log into segments and use a
+segment cleaner to compress the live information from heavily fragmented
+segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and
+implementation of a log-structured file system", ACM Trans. Computer Systems
+10, 1, 2652.
+
+Wandering Tree Problem
+----------------------
+In LFS, when a file data is updated and written to the end of log, its direct
+pointer block is updated due to the changed location. Then the indirect pointer
+block is also updated due to the direct pointer block update. In this manner,
+the upper index structures such as inode, inode map, and checkpoint block are
+also updated recursively. This problem is called as wandering tree problem [1],
+and in order to enhance the performance, it should eliminate or relax the update
+propagation as much as possible.
+
+[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/
+
+Cleaning Overhead
+-----------------
+Since LFS is based on out-of-place writes, it produces so many obsolete blocks
+scattered across the whole storage. In order to serve new empty log space, it
+needs to reclaim these obsolete blocks seamlessly to users. This job is called
+as a cleaning process.
+
+The process consists of three operations as follows.
+1. A victim segment is selected through referencing segment usage table.
+2. It loads parent index structures of all the data in the victim identified by
+ segment summary blocks.
+3. It checks the cross-reference between the data and its parent index structure.
+4. It moves valid data selectively.
+
+This cleaning job may cause unexpected long delays, so the most important goal
+is to hide the latencies to users. And also definitely, it should reduce the
+amount of valid data to be moved, and move them quickly as well.
+
+================================================================================
+KEY FEATURES
+================================================================================
+
+Flash Awareness
+---------------
+- Enlarge the random write area for better performance, but provide the high
+ spatial locality
+- Align FS data structures to the operational units in FTL as best efforts
+
+Wandering Tree Problem
+----------------------
+- Use a term, “node”, that represents inodes as well as various pointer blocks
+- Introduce Node Address Table (NAT) containing the locations of all the “node”
+ blocks; this will cut off the update propagation.
+
+Cleaning Overhead
+-----------------
+- Support a background cleaning process
+- Support greedy and cost-benefit algorithms for victim selection policies
+- Support multi-head logs for static/dynamic hot and cold data separation
+- Introduce adaptive logging for efficient block allocation
+
+================================================================================
+MOUNT OPTIONS
+================================================================================
+
+background_gc=%s Turn on/off cleaning operations, namely garbage
+ collection, triggered in background when I/O subsystem is
+ idle. If background_gc=on, it will turn on the garbage
+ collection and if background_gc=off, garbage collection
+ will be truned off.
+ Default value for this option is on. So garbage
+ collection is on by default.
+disable_roll_forward Disable the roll-forward recovery routine
+norecovery Disable the roll-forward recovery routine, mounted read-
+ only (i.e., -o ro,disable_roll_forward)
+discard Issue discard/TRIM commands when a segment is cleaned.
+no_heap Disable heap-style segment allocation which finds free
+ segments for data from the beginning of main area, while
+ for node from the end of main area.
+nouser_xattr Disable Extended User Attributes. Note: xattr is enabled
+ by default if CONFIG_F2FS_FS_XATTR is selected.
+noacl Disable POSIX Access Control List. Note: acl is enabled
+ by default if CONFIG_F2FS_FS_POSIX_ACL is selected.
+active_logs=%u Support configuring the number of active logs. In the
+ current design, f2fs supports only 2, 4, and 6 logs.
+ Default number is 6.
+disable_ext_identify Disable the extension list configured by mkfs, so f2fs
+ does not aware of cold files such as media files.
+inline_xattr Enable the inline xattrs feature.
+inline_data Enable the inline data feature: New created small(<~3.4k)
+ files can be written into inode block.
+inline_dentry Enable the inline dir feature: data in new created
+ directory entries can be written into inode block. The
+ space of inode block which is used to store inline
+ dentries is limited to ~3.4k.
+flush_merge Merge concurrent cache_flush commands as much as possible
+ to eliminate redundant command issues. If the underlying
+ device handles the cache_flush command relatively slowly,
+ recommend to enable this option.
+nobarrier This option can be used if underlying storage guarantees
+ its cached data should be written to the novolatile area.
+ If this option is set, no cache_flush commands are issued
+ but f2fs still guarantees the write ordering of all the
+ data writes.
+fastboot This option is used when a system wants to reduce mount
+ time as much as possible, even though normal performance
+ can be sacrificed.
+
+================================================================================
+DEBUGFS ENTRIES
+================================================================================
+
+/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as
+f2fs. Each file shows the whole f2fs information.
+
+/sys/kernel/debug/f2fs/status includes:
+ - major file system information managed by f2fs currently
+ - average SIT information about whole segments
+ - current memory footprint consumed by f2fs.
+
+================================================================================
+SYSFS ENTRIES
+================================================================================
+
+Information about mounted f2f2 file systems can be found in
+/sys/fs/f2fs. Each mounted filesystem will have a directory in
+/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda).
+The files in each per-device directory are shown in table below.
+
+Files in /sys/fs/f2fs/<devname>
+(see also Documentation/ABI/testing/sysfs-fs-f2fs)
+..............................................................................
+ File Content
+
+ gc_max_sleep_time This tuning parameter controls the maximum sleep
+ time for the garbage collection thread. Time is
+ in milliseconds.
+
+ gc_min_sleep_time This tuning parameter controls the minimum sleep
+ time for the garbage collection thread. Time is
+ in milliseconds.
+
+ gc_no_gc_sleep_time This tuning parameter controls the default sleep
+ time for the garbage collection thread. Time is
+ in milliseconds.
+
+ gc_idle This parameter controls the selection of victim
+ policy for garbage collection. Setting gc_idle = 0
+ (default) will disable this option. Setting
+ gc_idle = 1 will select the Cost Benefit approach
+ & setting gc_idle = 2 will select the greedy aproach.
+
+ reclaim_segments This parameter controls the number of prefree
+ segments to be reclaimed. If the number of prefree
+ segments is larger than the number of segments
+ in the proportion to the percentage over total
+ volume size, f2fs tries to conduct checkpoint to
+ reclaim the prefree segments to free segments.
+ By default, 5% over total # of segments.
+
+ max_small_discards This parameter controls the number of discard
+ commands that consist small blocks less than 2MB.
+ The candidates to be discarded are cached until
+ checkpoint is triggered, and issued during the
+ checkpoint. By default, it is disabled with 0.
+
+ trim_sections This parameter controls the number of sections
+ to be trimmed out in batch mode when FITRIM
+ conducts. 32 sections is set by default.
+
+ ipu_policy This parameter controls the policy of in-place
+ updates in f2fs. There are five policies:
+ 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR,
+ 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL,
+ 0x10: F2FS_IPU_FSYNC.
+
+ min_ipu_util This parameter controls the threshold to trigger
+ in-place-updates. The number indicates percentage
+ of the filesystem utilization, and used by
+ F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
+
+ min_fsync_blocks This parameter controls the threshold to trigger
+ in-place-updates when F2FS_IPU_FSYNC mode is set.
+ The number indicates the number of dirty pages
+ when fsync needs to flush on its call path. If
+ the number is less than this value, it triggers
+ in-place-updates.
+
+ max_victim_search This parameter controls the number of trials to
+ find a victim segment when conducting SSR and
+ cleaning operations. The default value is 4096
+ which covers 8GB block address range.
+
+ dir_level This parameter controls the directory level to
+ support large directory. If a directory has a
+ number of files, it can reduce the file lookup
+ latency by increasing this dir_level value.
+ Otherwise, it needs to decrease this value to
+ reduce the space overhead. The default value is 0.
+
+ ram_thresh This parameter controls the memory footprint used
+ by free nids and cached nat entries. By default,
+ 10 is set, which indicates 10 MB / 1 GB RAM.
+
+================================================================================
+USAGE
+================================================================================
+
+1. Download userland tools and compile them.
+
+2. Skip, if f2fs was compiled statically inside kernel.
+ Otherwise, insert the f2fs.ko module.
+ # insmod f2fs.ko
+
+3. Create a directory trying to mount
+ # mkdir /mnt/f2fs
+
+4. Format the block device, and then mount as f2fs
+ # mkfs.f2fs -l label /dev/block_device
+ # mount -t f2fs /dev/block_device /mnt/f2fs
+
+mkfs.f2fs
+---------
+The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem,
+which builds a basic on-disk layout.
+
+The options consist of:
+-l [label] : Give a volume label, up to 512 unicode name.
+-a [0 or 1] : Split start location of each area for heap-based allocation.
+ 1 is set by default, which performs this.
+-o [int] : Set overprovision ratio in percent over volume size.
+ 5 is set by default.
+-s [int] : Set the number of segments per section.
+ 1 is set by default.
+-z [int] : Set the number of sections per zone.
+ 1 is set by default.
+-e [str] : Set basic extension list. e.g. "mp3,gif,mov"
+-t [0 or 1] : Disable discard command or not.
+ 1 is set by default, which conducts discard.
+
+fsck.f2fs
+---------
+The fsck.f2fs is a tool to check the consistency of an f2fs-formatted
+partition, which examines whether the filesystem metadata and user-made data
+are cross-referenced correctly or not.
+Note that, initial version of the tool does not fix any inconsistency.
+
+The options consist of:
+ -d debug level [default:0]
+
+dump.f2fs
+---------
+The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
+file. Each file is dump_ssa and dump_sit.
+
+The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem.
+It shows on-disk inode information reconized by a given inode number, and is
+able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
+./dump_sit respectively.
+
+The options consist of:
+ -d debug level [default:0]
+ -i inode no (hex)
+ -s [SIT dump segno from #1~#2 (decimal), for all 0~-1]
+ -a [SSA dump segno from #1~#2 (decimal), for all 0~-1]
+
+Examples:
+# dump.f2fs -i [ino] /dev/sdx
+# dump.f2fs -s 0~-1 /dev/sdx (SIT dump)
+# dump.f2fs -a 0~-1 /dev/sdx (SSA dump)
+
+================================================================================
+DESIGN
+================================================================================
+
+On-disk Layout
+--------------
+
+F2FS divides the whole volume into a number of segments, each of which is fixed
+to 2MB in size. A section is composed of consecutive segments, and a zone
+consists of a set of sections. By default, section and zone sizes are set to one
+segment size identically, but users can easily modify the sizes by mkfs.
+
+F2FS splits the entire volume into six areas, and all the areas except superblock
+consists of multiple segments as described below.
+
+ align with the zone size <-|
+ |-> align with the segment size
+ _________________________________________________________________________
+ | | | Segment | Node | Segment | |
+ | Superblock | Checkpoint | Info. | Address | Summary | Main |
+ | (SB) | (CP) | Table (SIT) | Table (NAT) | Area (SSA) | |
+ |____________|_____2______|______N______|______N______|______N_____|__N___|
+ . .
+ . .
+ . .
+ ._________________________________________.
+ |_Segment_|_..._|_Segment_|_..._|_Segment_|
+ . .
+ ._________._________
+ |_section_|__...__|_
+ . .
+ .________.
+ |__zone__|
+
+- Superblock (SB)
+ : It is located at the beginning of the partition, and there exist two copies
+ to avoid file system crash. It contains basic partition information and some
+ default parameters of f2fs.
+
+- Checkpoint (CP)
+ : It contains file system information, bitmaps for valid NAT/SIT sets, orphan
+ inode lists, and summary entries of current active segments.
+
+- Segment Information Table (SIT)
+ : It contains segment information such as valid block count and bitmap for the
+ validity of all the blocks.
+
+- Node Address Table (NAT)
+ : It is composed of a block address table for all the node blocks stored in
+ Main area.
+
+- Segment Summary Area (SSA)
+ : It contains summary entries which contains the owner information of all the
+ data and node blocks stored in Main area.
+
+- Main Area
+ : It contains file and directory data including their indices.
+
+In order to avoid misalignment between file system and flash-based storage, F2FS
+aligns the start block address of CP with the segment size. Also, it aligns the
+start block address of Main area with the zone size by reserving some segments
+in SSA area.
+
+Reference the following survey for additional technical details.
+https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey
+
+File System Metadata Structure
+------------------------------
+
+F2FS adopts the checkpointing scheme to maintain file system consistency. At
+mount time, F2FS first tries to find the last valid checkpoint data by scanning
+CP area. In order to reduce the scanning time, F2FS uses only two copies of CP.
+One of them always indicates the last valid data, which is called as shadow copy
+mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism.
+
+For file system consistency, each CP points to which NAT and SIT copies are
+valid, as shown as below.
+
+ +--------+----------+---------+
+ | CP | SIT | NAT |
+ +--------+----------+---------+
+ . . . .
+ . . . .
+ . . . .
+ +-------+-------+--------+--------+--------+--------+
+ | CP #0 | CP #1 | SIT #0 | SIT #1 | NAT #0 | NAT #1 |
+ +-------+-------+--------+--------+--------+--------+
+ | ^ ^
+ | | |
+ `----------------------------------------'
+
+Index Structure
+---------------
+
+The key data structure to manage the data locations is a "node". Similar to
+traditional file structures, F2FS has three types of node: inode, direct node,
+indirect node. F2FS assigns 4KB to an inode block which contains 923 data block
+indices, two direct node pointers, two indirect node pointers, and one double
+indirect node pointer as described below. One direct node block contains 1018
+data blocks, and one indirect node block contains also 1018 node blocks. Thus,
+one inode block (i.e., a file) covers:
+
+ 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB.
+
+ Inode block (4KB)
+ |- data (923)
+ |- direct node (2)
+ | `- data (1018)
+ |- indirect node (2)
+ | `- direct node (1018)
+ | `- data (1018)
+ `- double indirect node (1)
+ `- indirect node (1018)
+ `- direct node (1018)
+ `- data (1018)
+
+Note that, all the node blocks are mapped by NAT which means the location of
+each node is translated by the NAT table. In the consideration of the wandering
+tree problem, F2FS is able to cut off the propagation of node updates caused by
+leaf data writes.
+
+Directory Structure
+-------------------
+
+A directory entry occupies 11 bytes, which consists of the following attributes.
+
+- hash hash value of the file name
+- ino inode number
+- len the length of file name
+- type file type such as directory, symlink, etc
+
+A dentry block consists of 214 dentry slots and file names. Therein a bitmap is
+used to represent whether each dentry is valid or not. A dentry block occupies
+4KB with the following composition.
+
+ Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) +
+ dentries(11 * 214 bytes) + file name (8 * 214 bytes)
+
+ [Bucket]
+ +--------------------------------+
+ |dentry block 1 | dentry block 2 |
+ +--------------------------------+
+ . .
+ . .
+ . [Dentry Block Structure: 4KB] .
+ +--------+----------+----------+------------+
+ | bitmap | reserved | dentries | file names |
+ +--------+----------+----------+------------+
+ [Dentry Block: 4KB] . .
+ . .
+ . .
+ +------+------+-----+------+
+ | hash | ino | len | type |
+ +------+------+-----+------+
+ [Dentry Structure: 11 bytes]
+
+F2FS implements multi-level hash tables for directory structure. Each level has
+a hash table with dedicated number of hash buckets as shown below. Note that
+"A(2B)" means a bucket includes 2 data blocks.
+
+----------------------
+A : bucket
+B : block
+N : MAX_DIR_HASH_DEPTH
+----------------------
+
+level #0 | A(2B)
+ |
+level #1 | A(2B) - A(2B)
+ |
+level #2 | A(2B) - A(2B) - A(2B) - A(2B)
+ . | . . . .
+level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
+ . | . . . .
+level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)
+
+The number of blocks and buckets are determined by,
+
+ ,- 2, if n < MAX_DIR_HASH_DEPTH / 2,
+ # of blocks in level #n = |
+ `- 4, Otherwise
+
+ ,- 2^(n + dir_level),
+ | if n + dir_level < MAX_DIR_HASH_DEPTH / 2,
+ # of buckets in level #n = |
+ `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1),
+ Otherwise
+
+When F2FS finds a file name in a directory, at first a hash value of the file
+name is calculated. Then, F2FS scans the hash table in level #0 to find the
+dentry consisting of the file name and its inode number. If not found, F2FS
+scans the next hash table in level #1. In this way, F2FS scans hash tables in
+each levels incrementally from 1 to N. In each levels F2FS needs to scan only
+one bucket determined by the following equation, which shows O(log(# of files))
+complexity.
+
+ bucket number to scan in level #n = (hash value) % (# of buckets in level #n)
+
+In the case of file creation, F2FS finds empty consecutive slots that cover the
+file name. F2FS searches the empty slots in the hash tables of whole levels from
+1 to N in the same way as the lookup operation.
+
+The following figure shows an example of two cases holding children.
+ --------------> Dir <--------------
+ | |
+ child child
+
+ child - child [hole] - child
+
+ child - child - child [hole] - [hole] - child
+
+ Case 1: Case 2:
+ Number of children = 6, Number of children = 3,
+ File size = 7 File size = 7
+
+Default Block Allocation
+------------------------
+
+At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node
+and Hot/Warm/Cold data.
+
+- Hot node contains direct node blocks of directories.
+- Warm node contains direct node blocks except hot node blocks.
+- Cold node contains indirect node blocks
+- Hot data contains dentry blocks
+- Warm data contains data blocks except hot and cold data blocks
+- Cold data contains multimedia data or migrated data blocks
+
+LFS has two schemes for free space management: threaded log and copy-and-compac-
+tion. The copy-and-compaction scheme which is known as cleaning, is well-suited
+for devices showing very good sequential write performance, since free segments
+are served all the time for writing new data. However, it suffers from cleaning
+overhead under high utilization. Contrarily, the threaded log scheme suffers
+from random writes, but no cleaning process is needed. F2FS adopts a hybrid
+scheme where the copy-and-compaction scheme is adopted by default, but the
+policy is dynamically changed to the threaded log scheme according to the file
+system status.
+
+In order to align F2FS with underlying flash-based storage, F2FS allocates a
+segment in a unit of section. F2FS expects that the section size would be the
+same as the unit size of garbage collection in FTL. Furthermore, with respect
+to the mapping granularity in FTL, F2FS allocates each section of the active
+logs from different zones as much as possible, since FTL can write the data in
+the active logs into one allocation unit according to its mapping granularity.
+
+Cleaning process
+----------------
+
+F2FS does cleaning both on demand and in the background. On-demand cleaning is
+triggered when there are not enough free segments to serve VFS calls. Background
+cleaner is operated by a kernel thread, and triggers the cleaning job when the
+system is idle.
+
+F2FS supports two victim selection policies: greedy and cost-benefit algorithms.
+In the greedy algorithm, F2FS selects a victim segment having the smallest number
+of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment
+according to the segment age and the number of valid blocks in order to address
+log block thrashing problem in the greedy algorithm. F2FS adopts the greedy
+algorithm for on-demand cleaner, while background cleaner adopts cost-benefit
+algorithm.
+
+In order to identify whether the data in the victim segment are valid or not,
+F2FS manages a bitmap. Each bit represents the validity of a block, and the
+bitmap is composed of a bit stream covering whole blocks in main area.
diff --git a/MAINTAINERS b/MAINTAINERS
index 0ca8705..d1907f4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2883,6 +2883,18 @@ F: Documentation/filesystems/caching/
F: fs/fscache/
F: include/linux/fscache*.h
+F2FS FILE SYSTEM
+M:» Jaegeuk Kim <jaegeuk@kernel.org>
+M:» Changman Lee <cm224.lee@samsung.com>
+L:» linux-f2fs-devel@lists.sourceforge.net
+W:» http://en.wikipedia.org/wiki/F2FS
+T:» git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
+S:» Maintained
+F:» Documentation/filesystems/f2fs.txt
+F:» Documentation/ABI/testing/sysfs-fs-f2fs
+F:» fs/f2fs/
+F:» include/linux/f2fs_fs.h
+
FUJITSU FR-V (FRV) PORT
M: David Howells <dhowells@redhat.com>
S: Maintained
diff --git a/arch/arm/configs/cyanogenmod_bacon_defconfig b/arch/arm/configs/cyanogenmod_bacon_defconfig
index 4526d39..00d696b 100644
--- a/arch/arm/configs/cyanogenmod_bacon_defconfig
+++ b/arch/arm/configs/cyanogenmod_bacon_defconfig
@@ -3241,6 +3241,12 @@ CONFIG_ECRYPT_FS=y
# CONFIG_PSTORE is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
+CONFIG_F2FS_FS=y
+CONFIG_F2FS_STAT_FS=y
+CONFIG_F2FS_FS_XATTR=y
+CONFIG_F2FS_FS_POSIX_ACL=y
+CONFIG_F2FS_FS_SECURITY=y
+# CONFIG_F2FS_CHECK_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
# CONFIG_NFS_FS is not set
# CONFIG_NFSD is not set
diff --git a/arch/arm/configs/cyanogenmod_find7_defconfig b/arch/arm/configs/cyanogenmod_find7_defconfig
index 7da04ab..e0a52fd 100644
--- a/arch/arm/configs/cyanogenmod_find7_defconfig
+++ b/arch/arm/configs/cyanogenmod_find7_defconfig
@@ -3241,6 +3241,12 @@ CONFIG_ECRYPT_FS=y
# CONFIG_PSTORE is not set
# CONFIG_SYSV_FS is not set
# CONFIG_UFS_FS is not set
+CONFIG_F2FS_FS=y
+CONFIG_F2FS_STAT_FS=y
+CONFIG_F2FS_FS_XATTR=y
+CONFIG_F2FS_FS_POSIX_ACL=y
+CONFIG_F2FS_FS_SECURITY=y
+# CONFIG_F2FS_CHECK_FS is not set
CONFIG_NETWORK_FILESYSTEMS=y
# CONFIG_NFS_FS is not set
# CONFIG_NFSD is not set
diff --git a/fs/Kconfig b/fs/Kconfig
index acea412..d71032d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -224,6 +224,7 @@ source "fs/pstore/Kconfig"
source "fs/sysv/Kconfig"
source "fs/ufs/Kconfig"
source "fs/exofs/Kconfig"
+source "fs/f2fs/Kconfig"
endif # MISC_FILESYSTEMS
diff --git a/fs/Makefile b/fs/Makefile
index 95cf9de6..b765459 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -122,6 +122,7 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_BTRFS_FS) += btrfs/
obj-$(CONFIG_GFS2_FS) += gfs2/
+obj-$(CONFIG_F2FS_FS) += f2fs/
obj-y += exofs/ # Multiple modules
obj-$(CONFIG_CEPH_FS) += ceph/
obj-$(CONFIG_PSTORE) += pstore/
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
new file mode 100644
index 0000000..94e2d2f
--- /dev/null
+++ b/fs/f2fs/Kconfig
@@ -0,0 +1,83 @@
+config F2FS_FS
+ tristate "F2FS filesystem support (EXPERIMENTAL)"
+ depends on BLOCK
+ help
+ F2FS is based on Log-structured File System (LFS), which supports
+ versatile "flash-friendly" features. The design has been focused on
+ addressing the fundamental issues in LFS, which are snowball effect
+ of wandering tree and high cleaning overhead.
+
+ Since flash-based storages show different characteristics according to
+ the internal geometry or flash memory management schemes aka FTL, F2FS
+ and tools support various parameters not only for configuring on-disk
+ layout, but also for selecting allocation and cleaning algorithms.
+
+ If unsure, say N.
+
+config F2FS_STAT_FS
+ bool "F2FS Status Information"
+ depends on F2FS_FS && DEBUG_FS
+ default y
+ help
+ /sys/kernel/debug/f2fs/ contains information about all the partitions
+ mounted as f2fs. Each file shows the whole f2fs information.
+
+ /sys/kernel/debug/f2fs/status includes:
+ - major filesystem information managed by f2fs currently
+ - average SIT information about whole segments
+ - current memory footprint consumed by f2fs.
+
+config F2FS_FS_XATTR
+ bool "F2FS extended attributes"
+ depends on F2FS_FS
+ default y
+ help
+ Extended attributes are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page, or visit
+ <http://acl.bestbits.at/> for details).
+
+ If unsure, say N.
+
+config F2FS_FS_POSIX_ACL
+ bool "F2FS Access Control Lists"
+ depends on F2FS_FS_XATTR
+ select FS_POSIX_ACL
+ default y
+ help
+ Posix Access Control Lists (ACLs) support permissions for users and
+ gourps beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N
+
+config F2FS_FS_SECURITY
+ bool "F2FS Security Labels"
+ depends on F2FS_FS_XATTR
+ help
+ Security labels provide an access control facility to support Linux
+ Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
+ Linux. This option enables an extended attribute handler for file
+ security labels in the f2fs filesystem, so that it requires enabling
+ the extended attribute support in advance.
+
+ If you are not using a security module, say N.
+
+config F2FS_CHECK_FS
+ bool "F2FS consistency checking feature"
+ depends on F2FS_FS
+ help
+ Enables BUG_ONs which check the filesystem consistency in runtime.
+
+ If you want to improve the performance, say N.
+
+config F2FS_IO_TRACE
+ bool "F2FS IO tracer"
+ depends on F2FS_FS
+ depends on FUNCTION_TRACER
+ help
+ F2FS IO trace is based on a function trace, which gathers process
+ information and block IO patterns in the filesystem level.
+
+ If unsure, say N.
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
new file mode 100644
index 0000000..d923977
--- /dev/null
+++ b/fs/f2fs/Makefile
@@ -0,0 +1,8 @@
+obj-$(CONFIG_F2FS_FS) += f2fs.o
+
+f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
+f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
+f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
+f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
+f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
+f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
new file mode 100644
index 0000000..df1a307
--- /dev/null
+++ b/fs/f2fs/acl.c
@@ -0,0 +1,403 @@
+/*
+ * fs/f2fs/acl.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/acl.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/f2fs_fs.h>
+#include "f2fs.h"
+#include "xattr.h"
+#include "acl.h"
+
+static inline size_t f2fs_acl_size(int count)
+{
+ if (count <= 4) {
+ return sizeof(struct f2fs_acl_header) +
+ count * sizeof(struct f2fs_acl_entry_short);
+ } else {
+ return sizeof(struct f2fs_acl_header) +
+ 4 * sizeof(struct f2fs_acl_entry_short) +
+ (count - 4) * sizeof(struct f2fs_acl_entry);
+ }
+}
+
+static inline int f2fs_acl_count(size_t size)
+{
+ ssize_t s;
+ size -= sizeof(struct f2fs_acl_header);
+ s = size - 4 * sizeof(struct f2fs_acl_entry_short);
+ if (s < 0) {
+ if (size % sizeof(struct f2fs_acl_entry_short))
+ return -1;
+ return size / sizeof(struct f2fs_acl_entry_short);
+ } else {
+ if (s % sizeof(struct f2fs_acl_entry))
+ return -1;
+ return s / sizeof(struct f2fs_acl_entry) + 4;
+ }
+}
+
+static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
+{
+ int i, count;
+ struct posix_acl *acl;
+ struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value;
+ struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
+ const char *end = value + size;
+
+ if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION))
+ return ERR_PTR(-EINVAL);
+
+ count = f2fs_acl_count(size);
+ if (count < 0)
+ return ERR_PTR(-EINVAL);
+ if (count == 0)
+ return NULL;
+
+ acl = posix_acl_alloc(count, GFP_NOFS);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < count; i++) {
+
+ if ((char *)entry > end)
+ goto fail;
+
+ acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag);
+ acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm);
+
+ switch (acl->a_entries[i].e_tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ entry = (struct f2fs_acl_entry *)((char *)entry +
+ sizeof(struct f2fs_acl_entry_short));
+ break;
+
+ case ACL_USER:
+ case ACL_GROUP:
+ acl->a_entries[i].e_id = le32_to_cpu(entry->e_id);
+ entry = (struct f2fs_acl_entry *)((char *)entry +
+ sizeof(struct f2fs_acl_entry));
+ break;
+ default:
+ goto fail;
+ }
+ }
+ if ((char *)entry != end)
+ goto fail;
+ return acl;
+fail:
+ posix_acl_release(acl);
+ return ERR_PTR(-EINVAL);
+}
+
+static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
+{
+ struct f2fs_acl_header *f2fs_acl;
+ struct f2fs_acl_entry *entry;
+ int i;
+
+ f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
+ sizeof(struct f2fs_acl_entry), GFP_NOFS);
+ if (!f2fs_acl)
+ return ERR_PTR(-ENOMEM);
+
+ f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION);
+ entry = (struct f2fs_acl_entry *)(f2fs_acl + 1);
+
+ for (i = 0; i < acl->a_count; i++) {
+
+ entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag);
+ entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm);
+
+ switch (acl->a_entries[i].e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ entry->e_id = cpu_to_le32(acl->a_entries[i].e_id);
+ entry = (struct f2fs_acl_entry *)((char *)entry +
+ sizeof(struct f2fs_acl_entry));
+ break;
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ entry = (struct f2fs_acl_entry *)((char *)entry +
+ sizeof(struct f2fs_acl_entry_short));
+ break;
+ default:
+ goto fail;
+ }
+ }
+ *size = f2fs_acl_size(acl->a_count);
+ return (void *)f2fs_acl;
+
+fail:
+ kfree(f2fs_acl);
+ return ERR_PTR(-EINVAL);
+}
+
+static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
+ struct page *dpage)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ void *value = NULL;
+ struct posix_acl *acl;
+ int retval;
+
+ if (!test_opt(sbi, POSIX_ACL))
+ return NULL;
+
+ acl = get_cached_acl(inode, type);
+ if (acl != ACL_NOT_CACHED)
+ return acl;
+
+ if (type == ACL_TYPE_ACCESS)
+ name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
+
+ retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage);
+ if (retval > 0) {
+ value = kmalloc(retval, GFP_F2FS_ZERO);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ retval = f2fs_getxattr(inode, name_index, "", value,
+ retval, dpage);
+ }
+
+ if (retval > 0)
+ acl = f2fs_acl_from_disk(value, retval);
+ else if (retval == -ENODATA)
+ acl = NULL;
+ else
+ acl = ERR_PTR(retval);
+ kfree(value);
+
+ if (!IS_ERR(acl))
+ set_cached_acl(inode, type, acl);
+
+ return acl;
+}
+
+struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
+{
+ return __f2fs_get_acl(inode, type, NULL);
+}
+
+static int f2fs_set_acl(struct inode *inode, int type,
+ struct posix_acl *acl, struct page *ipage)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ int name_index;
+ void *value = NULL;
+ size_t size = 0;
+ int error;
+
+ if (!test_opt(sbi, POSIX_ACL))
+ return 0;
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+ error = posix_acl_equiv_mode(acl, &inode->i_mode);
+ if (error < 0)
+ return error;
+ set_acl_inode(fi, inode->i_mode);
+ if (error == 0)
+ acl = NULL;
+ }
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (acl) {
+ value = f2fs_acl_to_disk(acl, &size);
+ if (IS_ERR(value)) {
+ clear_inode_flag(fi, FI_ACL_MODE);
+ return (int)PTR_ERR(value);
+ }
+ }
+
+ error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
+
+ kfree(value);
+ if (!error)
+ set_cached_acl(inode, type, acl);
+
+ clear_inode_flag(fi, FI_ACL_MODE);
+ return error;
+}
+
+int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
+ struct page *dpage)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ struct posix_acl *acl = NULL;
+ int error = 0;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (test_opt(sbi, POSIX_ACL)) {
+ acl = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+ if (!acl)
+ inode->i_mode &= ~current_umask();
+ }
+
+ if (!test_opt(sbi, POSIX_ACL) || !acl)
+ goto cleanup;
+
+ if (S_ISDIR(inode->i_mode)) {
+ error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl, ipage);
+ if (error)
+ goto cleanup;
+ }
+ error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
+ if (error < 0)
+ return error;
+ if (error > 0)
+ error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage);
+cleanup:
+ posix_acl_release(acl);
+ return error;
+}
+
+int f2fs_acl_chmod(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct posix_acl *acl;
+ int error;
+ umode_t mode = get_inode_mode(inode);
+
+ if (!test_opt(sbi, POSIX_ACL))
+ return 0;
+ if (S_ISLNK(mode))
+ return -EOPNOTSUPP;
+
+ acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+
+ error = posix_acl_chmod(&acl, GFP_KERNEL, mode);
+ if (error)
+ return error;
+
+ error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, NULL);
+ posix_acl_release(acl);
+ return error;
+}
+
+static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+ const char *xname = POSIX_ACL_XATTR_DEFAULT;
+ size_t size;
+
+ if (!test_opt(sbi, POSIX_ACL))
+ return 0;
+
+ if (type == ACL_TYPE_ACCESS)
+ xname = POSIX_ACL_XATTR_ACCESS;
+
+ size = strlen(xname) + 1;
+ if (list && size <= list_size)
+ memcpy(list, xname, size);
+ return size;
+}
+
+static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+ struct posix_acl *acl;
+ int error;
+
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ if (!test_opt(sbi, POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ acl = f2fs_get_acl(dentry->d_inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (!acl)
+ return -ENODATA;
+ error = posix_acl_to_xattr(acl, buffer, size);
+ posix_acl_release(acl);
+
+ return error;
+}
+
+static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+ struct inode *inode = dentry->d_inode;
+ struct posix_acl *acl = NULL;
+ int error;
+
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ if (!test_opt(sbi, POSIX_ACL))
+ return -EOPNOTSUPP;
+ if (!inode_owner_or_capable(inode))
+ return -EPERM;
+
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_valid(acl);
+ if (error)
+ goto release_and_out;
+ }
+ } else {
+ acl = NULL;
+ }
+
+ error = f2fs_set_acl(inode, type, acl, NULL);
+
+release_and_out:
+ posix_acl_release(acl);
+ return error;
+}
+
+const struct xattr_handler f2fs_xattr_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
+ .list = f2fs_xattr_list_acl,
+ .get = f2fs_xattr_get_acl,
+ .set = f2fs_xattr_set_acl,
+};
+
+const struct xattr_handler f2fs_xattr_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
+ .list = f2fs_xattr_list_acl,
+ .get = f2fs_xattr_get_acl,
+ .set = f2fs_xattr_set_acl,
+};
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
new file mode 100644
index 0000000..b4ba686
--- /dev/null
+++ b/fs/f2fs/acl.h
@@ -0,0 +1,59 @@
+/*
+ * fs/f2fs/acl.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/acl.h
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __F2FS_ACL_H__
+#define __F2FS_ACL_H__
+
+#include <linux/posix_acl_xattr.h>
+
+#define F2FS_ACL_VERSION 0x0001
+
+struct f2fs_acl_entry {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+};
+
+struct f2fs_acl_entry_short {
+ __le16 e_tag;
+ __le16 e_perm;
+};
+
+struct f2fs_acl_header {
+ __le32 a_version;
+};
+
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+
+extern struct posix_acl *f2fs_get_acl(struct inode *, int);
+extern int f2fs_acl_chmod(struct inode *);
+extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
+ struct page *);
+#else
+#define f2fs_check_acl NULL
+#define f2fs_get_acl NULL
+#define f2fs_set_acl NULL
+
+static inline int f2fs_acl_chmod(struct inode *inode)
+{
+ return 0;
+}
+
+static inline int f2fs_init_acl(struct inode *inode, struct inode *dir,
+ struct page *ipage, struct page *dpage)
+{
+ return 0;
+}
+#endif
+#endif /* __F2FS_ACL_H__ */
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
new file mode 100644
index 0000000..ae2ab5f
--- /dev/null
+++ b/fs/f2fs/checkpoint.c
@@ -0,0 +1,1136 @@
+/*
+ * fs/f2fs/checkpoint.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/bio.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/f2fs_fs.h>
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "trace.h"
+#include <trace/events/f2fs.h>
+
+static struct kmem_cache *ino_entry_slab;
+struct kmem_cache *inode_entry_slab;
+
+/*
+ * We guarantee no failure on the returned page.
+ */
+struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct address_space *mapping = META_MAPPING(sbi);
+ struct page *page = NULL;
+repeat:
+ page = grab_cache_page(mapping, index);
+ if (!page) {
+ cond_resched();
+ goto repeat;
+ }
+ f2fs_wait_on_page_writeback(page, META);
+ SetPageUptodate(page);
+ return page;
+}
+
+/*
+ * We guarantee no failure on the returned page.
+ */
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct address_space *mapping = META_MAPPING(sbi);
+ struct page *page;
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = READ_SYNC | REQ_META | REQ_PRIO,
+ .blk_addr = index,
+ };
+repeat:
+ page = grab_cache_page(mapping, index);
+ if (!page) {
+ cond_resched();
+ goto repeat;
+ }
+ if (PageUptodate(page))
+ goto out;
+
+ if (f2fs_submit_page_bio(sbi, page, &fio))
+ goto repeat;
+
+ lock_page(page);
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
+out:
+ mark_page_accessed(page);
+ return page;
+}
+
+static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ switch (type) {
+ case META_NAT:
+ break;
+ case META_SIT:
+ if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
+ return false;
+ break;
+ case META_SSA:
+ if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
+ blkaddr < SM_I(sbi)->ssa_blkaddr))
+ return false;
+ break;
+ case META_CP:
+ if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
+ blkaddr < __start_cp_addr(sbi)))
+ return false;
+ break;
+ case META_POR:
+ if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+ blkaddr < MAIN_BLKADDR(sbi)))
+ return false;
+ break;
+ default:
+ BUG();
+ }
+
+ return true;
+}
+
+/*
+ * Readahead CP/NAT/SIT/SSA pages
+ */
+int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type)
+{
+ block_t prev_blk_addr = 0;
+ struct page *page;
+ block_t blkno = start;
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = READ_SYNC | REQ_META | REQ_PRIO
+ };
+
+ for (; nrpages-- > 0; blkno++) {
+
+ if (!is_valid_blkaddr(sbi, blkno, type))
+ goto out;
+
+ switch (type) {
+ case META_NAT:
+ if (unlikely(blkno >=
+ NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
+ blkno = 0;
+ /* get nat block addr */
+ fio.blk_addr = current_nat_addr(sbi,
+ blkno * NAT_ENTRY_PER_BLOCK);
+ break;
+ case META_SIT:
+ /* get sit block addr */
+ fio.blk_addr = current_sit_addr(sbi,
+ blkno * SIT_ENTRY_PER_BLOCK);
+ if (blkno != start && prev_blk_addr + 1 != fio.blk_addr)
+ goto out;
+ prev_blk_addr = fio.blk_addr;
+ break;
+ case META_SSA:
+ case META_CP:
+ case META_POR:
+ fio.blk_addr = blkno;
+ break;
+ default:
+ BUG();
+ }
+
+ page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr);
+ if (!page)
+ continue;
+ if (PageUptodate(page)) {
+ f2fs_put_page(page, 1);
+ continue;
+ }
+
+ f2fs_submit_page_mbio(sbi, page, &fio);
+ f2fs_put_page(page, 0);
+ }
+out:
+ f2fs_submit_merged_bio(sbi, META, READ);
+ return blkno - start;
+}
+
+void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct page *page;
+ bool readahead = false;
+
+ page = find_get_page(META_MAPPING(sbi), index);
+ if (!page || (page && !PageUptodate(page)))
+ readahead = true;
+ f2fs_put_page(page, 0);
+
+ if (readahead)
+ ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
+}
+
+static int f2fs_write_meta_page(struct page *page,
+ struct writeback_control *wbc)
+{
+ struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+
+ trace_f2fs_writepage(page, META);
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto redirty_out;
+ if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
+ goto redirty_out;
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
+
+ f2fs_wait_on_page_writeback(page, META);
+ write_meta_page(sbi, page);
+ dec_page_count(sbi, F2FS_DIRTY_META);
+ unlock_page(page);
+
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, META, WRITE);
+ return 0;
+
+redirty_out:
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
+}
+
+static int f2fs_write_meta_pages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ long diff, written;
+
+ trace_f2fs_writepages(mapping->host, wbc, META);
+
+ /* collect a number of dirty meta pages and write together */
+ if (wbc->for_kupdate ||
+ get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+ goto skip_write;
+
+ /* if mounting is failed, skip writing node pages */
+ mutex_lock(&sbi->cp_mutex);
+ diff = nr_pages_to_write(sbi, META, wbc);
+ written = sync_meta_pages(sbi, META, wbc->nr_to_write);
+ mutex_unlock(&sbi->cp_mutex);
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
+ return 0;
+
+skip_write:
+ wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
+ return 0;
+}
+
+long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+ long nr_to_write)
+{
+ struct address_space *mapping = META_MAPPING(sbi);
+ pgoff_t index = 0, end = LONG_MAX;
+ struct pagevec pvec;
+ long nwritten = 0;
+ struct writeback_control wbc = {
+ .for_reclaim = 0,
+ };
+
+ pagevec_init(&pvec, 0);
+
+ while (index <= end) {
+ int i, nr_pages;
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ if (unlikely(nr_pages == 0))
+ break;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ lock_page(page);
+
+ if (unlikely(page->mapping != mapping)) {
+continue_unlock:
+ unlock_page(page);
+ continue;
+ }
+ if (!PageDirty(page)) {
+ /* someone wrote it for us */
+ goto continue_unlock;
+ }
+
+ if (!clear_page_dirty_for_io(page))
+ goto continue_unlock;
+
+ if (f2fs_write_meta_page(page, &wbc)) {
+ unlock_page(page);
+ break;
+ }
+ nwritten++;
+ if (unlikely(nwritten >= nr_to_write))
+ break;
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+
+ if (nwritten)
+ f2fs_submit_merged_bio(sbi, type, WRITE);
+
+ return nwritten;
+}
+
+static int f2fs_set_meta_page_dirty(struct page *page)
+{
+ trace_f2fs_set_page_dirty(page, META);
+
+ SetPageUptodate(page);
+ if (!PageDirty(page)) {
+ __set_page_dirty_nobuffers(page);
+ inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
+ SetPagePrivate(page);
+ f2fs_trace_pid(page);
+ return 1;
+ }
+ return 0;
+}
+
+const struct address_space_operations f2fs_meta_aops = {
+ .writepage = f2fs_write_meta_page,
+ .writepages = f2fs_write_meta_pages,
+ .set_page_dirty = f2fs_set_meta_page_dirty,
+ .invalidatepage = f2fs_invalidate_page,
+ .releasepage = f2fs_release_page,
+};
+
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ struct inode_management *im = &sbi->im[type];
+ struct ino_entry *e;
+retry:
+ if (radix_tree_preload(GFP_NOFS)) {
+ cond_resched();
+ goto retry;
+ }
+
+ spin_lock(&im->ino_lock);
+
+ e = radix_tree_lookup(&im->ino_root, ino);
+ if (!e) {
+ e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
+ if (!e) {
+ spin_unlock(&im->ino_lock);
+ radix_tree_preload_end();
+ goto retry;
+ }
+ if (radix_tree_insert(&im->ino_root, ino, e)) {
+ spin_unlock(&im->ino_lock);
+ kmem_cache_free(ino_entry_slab, e);
+ radix_tree_preload_end();
+ goto retry;
+ }
+ memset(e, 0, sizeof(struct ino_entry));
+ e->ino = ino;
+
+ list_add_tail(&e->list, &im->ino_list);
+ if (type != ORPHAN_INO)
+ im->ino_num++;
+ }
+ spin_unlock(&im->ino_lock);
+ radix_tree_preload_end();
+}
+
+static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ struct inode_management *im = &sbi->im[type];
+ struct ino_entry *e;
+
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
+ if (e) {
+ list_del(&e->list);
+ radix_tree_delete(&im->ino_root, ino);
+ im->ino_num--;
+ spin_unlock(&im->ino_lock);
+ kmem_cache_free(ino_entry_slab, e);
+ return;
+ }
+ spin_unlock(&im->ino_lock);
+}
+
+void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* add new dirty ino entry into list */
+ __add_ino_entry(sbi, ino, type);
+}
+
+void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* remove dirty ino entry from list */
+ __remove_ino_entry(sbi, ino, type);
+}
+
+/* mode should be APPEND_INO or UPDATE_INO */
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
+{
+ struct inode_management *im = &sbi->im[mode];
+ struct ino_entry *e;
+
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
+ spin_unlock(&im->ino_lock);
+ return e ? true : false;
+}
+
+void release_dirty_inode(struct f2fs_sb_info *sbi)
+{
+ struct ino_entry *e, *tmp;
+ int i;
+
+ for (i = APPEND_INO; i <= UPDATE_INO; i++) {
+ struct inode_management *im = &sbi->im[i];
+
+ spin_lock(&im->ino_lock);
+ list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
+ list_del(&e->list);
+ radix_tree_delete(&im->ino_root, e->ino);
+ kmem_cache_free(ino_entry_slab, e);
+ im->ino_num--;
+ }
+ spin_unlock(&im->ino_lock);
+ }
+}
+
+int acquire_orphan_inode(struct f2fs_sb_info *sbi)
+{
+ struct inode_management *im = &sbi->im[ORPHAN_INO];
+ int err = 0;
+
+ spin_lock(&im->ino_lock);
+ if (unlikely(im->ino_num >= sbi->max_orphans))
+ err = -ENOSPC;
+ else
+ im->ino_num++;
+ spin_unlock(&im->ino_lock);
+
+ return err;
+}
+
+void release_orphan_inode(struct f2fs_sb_info *sbi)
+{
+ struct inode_management *im = &sbi->im[ORPHAN_INO];
+
+ spin_lock(&im->ino_lock);
+ f2fs_bug_on(sbi, im->ino_num == 0);
+ im->ino_num--;
+ spin_unlock(&im->ino_lock);
+}
+
+void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ /* add new orphan ino entry into list */
+ __add_ino_entry(sbi, ino, ORPHAN_INO);
+}
+
+void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ /* remove orphan entry from orphan list */
+ __remove_ino_entry(sbi, ino, ORPHAN_INO);
+}
+
+static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct inode *inode = f2fs_iget(sbi->sb, ino);
+ f2fs_bug_on(sbi, IS_ERR(inode));
+ clear_nlink(inode);
+
+ /* truncate all the data during iput */
+ iput(inode);
+}
+
+void recover_orphan_inodes(struct f2fs_sb_info *sbi)
+{
+ block_t start_blk, orphan_blkaddr, i, j;
+
+ if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
+ return;
+
+ set_sbi_flag(sbi, SBI_POR_DOING);
+
+ start_blk = __start_cp_addr(sbi) + 1 +
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+ orphan_blkaddr = __start_sum_addr(sbi) - 1;
+
+ ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
+
+ for (i = 0; i < orphan_blkaddr; i++) {
+ struct page *page = get_meta_page(sbi, start_blk + i);
+ struct f2fs_orphan_block *orphan_blk;
+
+ orphan_blk = (struct f2fs_orphan_block *)page_address(page);
+ for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
+ nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
+ recover_orphan_inode(sbi, ino);
+ }
+ f2fs_put_page(page, 1);
+ }
+ /* clear Orphan Flag */
+ clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
+ clear_sbi_flag(sbi, SBI_POR_DOING);
+ return;
+}
+
+static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
+{
+ struct list_head *head;
+ struct f2fs_orphan_block *orphan_blk = NULL;
+ unsigned int nentries = 0;
+ unsigned short index;
+ unsigned short orphan_blocks;
+ struct page *page = NULL;
+ struct ino_entry *orphan = NULL;
+ struct inode_management *im = &sbi->im[ORPHAN_INO];
+
+ orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
+
+ for (index = 0; index < orphan_blocks; index++)
+ grab_meta_page(sbi, start_blk + index);
+
+ index = 1;
+ spin_lock(&im->ino_lock);
+ head = &im->ino_list;
+
+ /* loop for each orphan inode entry and write them in Jornal block */
+ list_for_each_entry(orphan, head, list) {
+ if (!page) {
+ page = find_get_page(META_MAPPING(sbi), start_blk++);
+ f2fs_bug_on(sbi, !page);
+ orphan_blk =
+ (struct f2fs_orphan_block *)page_address(page);
+ memset(orphan_blk, 0, sizeof(*orphan_blk));
+ f2fs_put_page(page, 0);
+ }
+
+ orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
+
+ if (nentries == F2FS_ORPHANS_PER_BLOCK) {
+ /*
+ * an orphan block is full of 1020 entries,
+ * then we need to flush current orphan blocks
+ * and bring another one in memory
+ */
+ orphan_blk->blk_addr = cpu_to_le16(index);
+ orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
+ orphan_blk->entry_count = cpu_to_le32(nentries);
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+ index++;
+ nentries = 0;
+ page = NULL;
+ }
+ }
+
+ if (page) {
+ orphan_blk->blk_addr = cpu_to_le16(index);
+ orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
+ orphan_blk->entry_count = cpu_to_le32(nentries);
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+ }
+
+ spin_unlock(&im->ino_lock);
+}
+
+static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
+ block_t cp_addr, unsigned long long *version)
+{
+ struct page *cp_page_1, *cp_page_2 = NULL;
+ unsigned long blk_size = sbi->blocksize;
+ struct f2fs_checkpoint *cp_block;
+ unsigned long long cur_version = 0, pre_version = 0;
+ size_t crc_offset;
+ __u32 crc = 0;
+
+ /* Read the 1st cp block in this CP pack */
+ cp_page_1 = get_meta_page(sbi, cp_addr);
+
+ /* get the version number */
+ cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
+ crc_offset = le32_to_cpu(cp_block->checksum_offset);
+ if (crc_offset >= blk_size)
+ goto invalid_cp1;
+
+ crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
+ if (!f2fs_crc_valid(crc, cp_block, crc_offset))
+ goto invalid_cp1;
+
+ pre_version = cur_cp_version(cp_block);
+
+ /* Read the 2nd cp block in this CP pack */
+ cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+ cp_page_2 = get_meta_page(sbi, cp_addr);
+
+ cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
+ crc_offset = le32_to_cpu(cp_block->checksum_offset);
+ if (crc_offset >= blk_size)
+ goto invalid_cp2;
+
+ crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
+ if (!f2fs_crc_valid(crc, cp_block, crc_offset))
+ goto invalid_cp2;
+
+ cur_version = cur_cp_version(cp_block);
+
+ if (cur_version == pre_version) {
+ *version = cur_version;
+ f2fs_put_page(cp_page_2, 1);
+ return cp_page_1;
+ }
+invalid_cp2:
+ f2fs_put_page(cp_page_2, 1);
+invalid_cp1:
+ f2fs_put_page(cp_page_1, 1);
+ return NULL;
+}
+
+int get_valid_checkpoint(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_checkpoint *cp_block;
+ struct f2fs_super_block *fsb = sbi->raw_super;
+ struct page *cp1, *cp2, *cur_page;
+ unsigned long blk_size = sbi->blocksize;
+ unsigned long long cp1_version = 0, cp2_version = 0;
+ unsigned long long cp_start_blk_no;
+ unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+ block_t cp_blk_no;
+ int i;
+
+ sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
+ if (!sbi->ckpt)
+ return -ENOMEM;
+ /*
+ * Finding out valid cp block involves read both
+ * sets( cp pack1 and cp pack 2)
+ */
+ cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+ cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
+
+ /* The second checkpoint pack should start at the next segment */
+ cp_start_blk_no += ((unsigned long long)1) <<
+ le32_to_cpu(fsb->log_blocks_per_seg);
+ cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
+
+ if (cp1 && cp2) {
+ if (ver_after(cp2_version, cp1_version))
+ cur_page = cp2;
+ else
+ cur_page = cp1;
+ } else if (cp1) {
+ cur_page = cp1;
+ } else if (cp2) {
+ cur_page = cp2;
+ } else {
+ goto fail_no_cp;
+ }
+
+ cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
+ memcpy(sbi->ckpt, cp_block, blk_size);
+
+ if (cp_blks <= 1)
+ goto done;
+
+ cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+ if (cur_page == cp2)
+ cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+
+ for (i = 1; i < cp_blks; i++) {
+ void *sit_bitmap_ptr;
+ unsigned char *ckpt = (unsigned char *)sbi->ckpt;
+
+ cur_page = get_meta_page(sbi, cp_blk_no + i);
+ sit_bitmap_ptr = page_address(cur_page);
+ memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
+ f2fs_put_page(cur_page, 1);
+ }
+done:
+ f2fs_put_page(cp1, 1);
+ f2fs_put_page(cp2, 1);
+ return 0;
+
+fail_no_cp:
+ kfree(sbi->ckpt);
+ return -EINVAL;
+}
+
+static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
+ return -EEXIST;
+
+ set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+ F2FS_I(inode)->dirty_dir = new;
+ list_add_tail(&new->list, &sbi->dir_inode_list);
+ stat_inc_dirty_dir(sbi);
+ return 0;
+}
+
+void update_dirty_page(struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct inode_entry *new;
+ int ret = 0;
+
+ if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
+ return;
+
+ if (!S_ISDIR(inode->i_mode)) {
+ inode_inc_dirty_pages(inode);
+ goto out;
+ }
+
+ new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ new->inode = inode;
+ INIT_LIST_HEAD(&new->list);
+
+ spin_lock(&sbi->dir_inode_lock);
+ ret = __add_dirty_inode(inode, new);
+ inode_inc_dirty_pages(inode);
+ spin_unlock(&sbi->dir_inode_lock);
+
+ if (ret)
+ kmem_cache_free(inode_entry_slab, new);
+out:
+ SetPagePrivate(page);
+ f2fs_trace_pid(page);
+}
+
+void add_dirty_dir_inode(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct inode_entry *new =
+ f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ int ret = 0;
+
+ new->inode = inode;
+ INIT_LIST_HEAD(&new->list);
+
+ spin_lock(&sbi->dir_inode_lock);
+ ret = __add_dirty_inode(inode, new);
+ spin_unlock(&sbi->dir_inode_lock);
+
+ if (ret)
+ kmem_cache_free(inode_entry_slab, new);
+}
+
+void remove_dirty_dir_inode(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct inode_entry *entry;
+
+ if (!S_ISDIR(inode->i_mode))
+ return;
+
+ spin_lock(&sbi->dir_inode_lock);
+ if (get_dirty_pages(inode) ||
+ !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
+ spin_unlock(&sbi->dir_inode_lock);
+ return;
+ }
+
+ entry = F2FS_I(inode)->dirty_dir;
+ list_del(&entry->list);
+ F2FS_I(inode)->dirty_dir = NULL;
+ clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+ stat_dec_dirty_dir(sbi);
+ spin_unlock(&sbi->dir_inode_lock);
+ kmem_cache_free(inode_entry_slab, entry);
+
+ /* Only from the recovery routine */
+ if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
+ clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
+ iput(inode);
+ }
+}
+
+void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
+{
+ struct list_head *head;
+ struct inode_entry *entry;
+ struct inode *inode;
+retry:
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+
+ spin_lock(&sbi->dir_inode_lock);
+
+ head = &sbi->dir_inode_list;
+ if (list_empty(head)) {
+ spin_unlock(&sbi->dir_inode_lock);
+ return;
+ }
+ entry = list_entry(head->next, struct inode_entry, list);
+ inode = igrab(entry->inode);
+ spin_unlock(&sbi->dir_inode_lock);
+ if (inode) {
+ filemap_fdatawrite(inode->i_mapping);
+ iput(inode);
+ } else {
+ /*
+ * We should submit bio, since it exists several
+ * wribacking dentry pages in the freeing inode.
+ */
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ }
+ goto retry;
+}
+
+/*
+ * Freeze all the FS-operations for checkpoint.
+ */
+static int block_operations(struct f2fs_sb_info *sbi)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .for_reclaim = 0,
+ };
+ struct blk_plug plug;
+ int err = 0;
+
+ blk_start_plug(&plug);
+
+retry_flush_dents:
+ f2fs_lock_all(sbi);
+ /* write all the dirty dentry pages */
+ if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
+ f2fs_unlock_all(sbi);
+ sync_dirty_dir_inodes(sbi);
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto out;
+ }
+ goto retry_flush_dents;
+ }
+
+ /*
+ * POR: we should ensure that there are no dirty node pages
+ * until finishing nat/sit flush.
+ */
+retry_flush_nodes:
+ down_write(&sbi->node_write);
+
+ if (get_pages(sbi, F2FS_DIRTY_NODES)) {
+ up_write(&sbi->node_write);
+ sync_node_pages(sbi, 0, &wbc);
+ if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_unlock_all(sbi);
+ err = -EIO;
+ goto out;
+ }
+ goto retry_flush_nodes;
+ }
+out:
+ blk_finish_plug(&plug);
+ return err;
+}
+
+static void unblock_operations(struct f2fs_sb_info *sbi)
+{
+ up_write(&sbi->node_write);
+ f2fs_unlock_all(sbi);
+}
+
+static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+{
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
+
+ if (!get_pages(sbi, F2FS_WRITEBACK))
+ break;
+
+ io_schedule();
+ }
+ finish_wait(&sbi->cp_wait, &wait);
+}
+
+static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
+ nid_t last_nid = nm_i->next_scan_nid;
+ block_t start_blk;
+ struct page *cp_page;
+ unsigned int data_sum_blocks, orphan_blocks;
+ __u32 crc32 = 0;
+ void *kaddr;
+ int i;
+ int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+
+ /*
+ * This avoids to conduct wrong roll-forward operations and uses
+ * metapages, so should be called prior to sync_meta_pages below.
+ */
+ discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
+
+ /* Flush all the NAT/SIT pages */
+ while (get_pages(sbi, F2FS_DIRTY_META)) {
+ sync_meta_pages(sbi, META, LONG_MAX);
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+ }
+
+ next_free_nid(sbi, &last_nid);
+
+ /*
+ * modify checkpoint
+ * version number is already updated
+ */
+ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
+ ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
+ ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
+ for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+ ckpt->cur_node_segno[i] =
+ cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
+ ckpt->cur_node_blkoff[i] =
+ cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
+ ckpt->alloc_type[i + CURSEG_HOT_NODE] =
+ curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
+ }
+ for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
+ ckpt->cur_data_segno[i] =
+ cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
+ ckpt->cur_data_blkoff[i] =
+ cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
+ ckpt->alloc_type[i + CURSEG_HOT_DATA] =
+ curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
+ }
+
+ ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
+ ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
+ ckpt->next_free_nid = cpu_to_le32(last_nid);
+
+ /* 2 cp + n data seg summary + orphan inode blocks */
+ data_sum_blocks = npages_for_summary_flush(sbi, false);
+ if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
+ set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+ else
+ clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+
+ orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
+ ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
+ orphan_blocks);
+
+ if (__remain_node_summaries(cpc->reason))
+ ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
+ cp_payload_blks + data_sum_blocks +
+ orphan_blocks + NR_CURSEG_NODE_TYPE);
+ else
+ ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
+ cp_payload_blks + data_sum_blocks +
+ orphan_blocks);
+
+ if (cpc->reason == CP_UMOUNT)
+ set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+ else
+ clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+
+ if (cpc->reason == CP_FASTBOOT)
+ set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
+ else
+ clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
+
+ if (orphan_num)
+ set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+ else
+ clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+
+ /* update SIT/NAT bitmap */
+ get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
+ get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
+
+ crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
+ *((__le32 *)((unsigned char *)ckpt +
+ le32_to_cpu(ckpt->checksum_offset)))
+ = cpu_to_le32(crc32);
+
+ start_blk = __start_cp_addr(sbi);
+
+ /* write out checkpoint buffer at block 0 */
+ cp_page = grab_meta_page(sbi, start_blk++);
+ kaddr = page_address(cp_page);
+ memcpy(kaddr, ckpt, F2FS_BLKSIZE);
+ set_page_dirty(cp_page);
+ f2fs_put_page(cp_page, 1);
+
+ for (i = 1; i < 1 + cp_payload_blks; i++) {
+ cp_page = grab_meta_page(sbi, start_blk++);
+ kaddr = page_address(cp_page);
+ memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE);
+ set_page_dirty(cp_page);
+ f2fs_put_page(cp_page, 1);
+ }
+
+ if (orphan_num) {
+ write_orphan_inodes(sbi, start_blk);
+ start_blk += orphan_blocks;
+ }
+
+ write_data_summaries(sbi, start_blk);
+ start_blk += data_sum_blocks;
+ if (__remain_node_summaries(cpc->reason)) {
+ write_node_summaries(sbi, start_blk);
+ start_blk += NR_CURSEG_NODE_TYPE;
+ }
+
+ /* writeout checkpoint block */
+ cp_page = grab_meta_page(sbi, start_blk);
+ kaddr = page_address(cp_page);
+ memcpy(kaddr, ckpt, F2FS_BLKSIZE);
+ set_page_dirty(cp_page);
+ f2fs_put_page(cp_page, 1);
+
+ /* wait for previous submitted node/meta pages writeback */
+ wait_on_all_pages_writeback(sbi);
+
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+
+ filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
+ filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
+
+ /* update user_block_counts */
+ sbi->last_valid_block_count = sbi->total_valid_block_count;
+ sbi->alloc_valid_block_count = 0;
+
+ /* Here, we only have one bio having CP pack */
+ sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
+
+ /* wait for previous submitted meta pages writeback */
+ wait_on_all_pages_writeback(sbi);
+
+ release_dirty_inode(sbi);
+
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+
+ clear_prefree_segments(sbi);
+ clear_sbi_flag(sbi, SBI_IS_DIRTY);
+}
+
+/*
+ * We guarantee that this checkpoint procedure will not fail.
+ */
+void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ unsigned long long ckpt_ver;
+
+ trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
+
+ mutex_lock(&sbi->cp_mutex);
+
+ if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
+ cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT)
+ goto out;
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto out;
+ if (f2fs_readonly(sbi->sb))
+ goto out;
+ if (block_operations(sbi))
+ goto out;
+
+ trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
+
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ f2fs_submit_merged_bio(sbi, META, WRITE);
+
+ /*
+ * update checkpoint pack index
+ * Increase the version number so that
+ * SIT entries and seg summaries are written at correct place
+ */
+ ckpt_ver = cur_cp_version(ckpt);
+ ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
+
+ /* write cached NAT/SIT entries to NAT/SIT area */
+ flush_nat_entries(sbi);
+ flush_sit_entries(sbi, cpc);
+
+ /* unlock all the fs_lock[] in do_checkpoint() */
+ do_checkpoint(sbi, cpc);
+
+ unblock_operations(sbi);
+ stat_inc_cp_count(sbi->stat_info);
+out:
+ mutex_unlock(&sbi->cp_mutex);
+ trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
+}
+
+void init_ino_entry_info(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ for (i = 0; i < MAX_INO_ENTRY; i++) {
+ struct inode_management *im = &sbi->im[i];
+
+ INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
+ spin_lock_init(&im->ino_lock);
+ INIT_LIST_HEAD(&im->ino_list);
+ im->ino_num = 0;
+ }
+
+ /*
+ * considering 512 blocks in a segment 8 blocks are needed for cp
+ * and log segment summaries. Remaining blocks are used to keep
+ * orphan entries with the limitation one reserved segment
+ * for cp pack we can have max 1020*504 orphan entries
+ */
+ sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
+ NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
+}
+
+int __init create_checkpoint_caches(void)
+{
+ ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
+ sizeof(struct ino_entry));
+ if (!ino_entry_slab)
+ return -ENOMEM;
+ inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
+ sizeof(struct inode_entry));
+ if (!inode_entry_slab) {
+ kmem_cache_destroy(ino_entry_slab);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void destroy_checkpoint_caches(void)
+{
+ kmem_cache_destroy(ino_entry_slab);
+ kmem_cache_destroy(inode_entry_slab);
+}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
new file mode 100644
index 0000000..7c507bf
--- /dev/null
+++ b/fs/f2fs/data.c
@@ -0,0 +1,1253 @@
+/*
+ * fs/f2fs/data.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/aio.h>
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/prefetch.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "trace.h"
+#include <trace/events/f2fs.h>
+
+static void f2fs_read_end_io(struct bio *bio, int err)
+{
+ struct bio_vec *bvec;
+ int i;
+
+ __bio_for_each_segment(bvec, bio, i, 0) {
+ struct page *page = bvec->bv_page;
+
+ if (!err) {
+ SetPageUptodate(page);
+ } else {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+ unlock_page(page);
+ }
+ bio_put(bio);
+}
+
+static void f2fs_write_end_io(struct bio *bio, int err)
+{
+ struct f2fs_sb_info *sbi = bio->bi_private;
+ struct bio_vec *bvec;
+ int i;
+
+ __bio_for_each_segment(bvec, bio, i, 0) {
+ struct page *page = bvec->bv_page;
+
+ if (unlikely(err)) {
+ set_page_dirty(page);
+ set_bit(AS_EIO, &page->mapping->flags);
+ f2fs_stop_checkpoint(sbi);
+ }
+ end_page_writeback(page);
+ dec_page_count(sbi, F2FS_WRITEBACK);
+ }
+
+ if (!get_pages(sbi, F2FS_WRITEBACK) &&
+ !list_empty(&sbi->cp_wait.task_list))
+ wake_up(&sbi->cp_wait);
+
+ bio_put(bio);
+}
+
+/*
+ * Low-level block read/write IO operations.
+ */
+static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
+ int npages, bool is_read)
+{
+ struct bio *bio;
+
+ /* No failure on bio allocation */
+ bio = bio_alloc(GFP_NOIO, npages);
+
+ bio->bi_bdev = sbi->sb->s_bdev;
+ bio->bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+ bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
+ bio->bi_private = sbi;
+
+ return bio;
+}
+
+static void __submit_merged_bio(struct f2fs_bio_info *io)
+{
+ struct f2fs_io_info *fio = &io->fio;
+
+ if (!io->bio)
+ return;
+
+ if (is_read_io(fio->rw))
+ trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
+ else
+ trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
+
+ submit_bio(fio->rw, io->bio);
+ io->bio = NULL;
+}
+
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
+ enum page_type type, int rw)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io;
+
+ io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
+
+ down_write(&io->io_rwsem);
+
+ /* change META to META_FLUSH in the checkpoint procedure */
+ if (type >= META_FLUSH) {
+ io->fio.type = META_FLUSH;
+ if (test_opt(sbi, NOBARRIER))
+ io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
+ else
+ io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+ }
+ __submit_merged_bio(io);
+ up_write(&io->io_rwsem);
+}
+
+/*
+ * Fill the locked page with data located in the block address.
+ * Return unlocked page.
+ */
+int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
+ struct f2fs_io_info *fio)
+{
+ struct bio *bio;
+
+ trace_f2fs_submit_page_bio(page, fio);
+ f2fs_trace_ios(page, fio, 0);
+
+ /* Allocate a new bio */
+ bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw));
+
+ if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ bio_put(bio);
+ f2fs_put_page(page, 1);
+ return -EFAULT;
+ }
+
+ submit_bio(fio->rw, bio);
+ return 0;
+}
+
+void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
+ struct f2fs_io_info *fio)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
+ struct f2fs_bio_info *io;
+ bool is_read = is_read_io(fio->rw);
+
+ io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+
+ verify_block_addr(sbi, fio->blk_addr);
+
+ down_write(&io->io_rwsem);
+
+ if (!is_read)
+ inc_page_count(sbi, F2FS_WRITEBACK);
+
+ if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 ||
+ io->fio.rw != fio->rw))
+ __submit_merged_bio(io);
+alloc_new:
+ if (io->bio == NULL) {
+ int bio_blocks = MAX_BIO_BLOCKS(sbi);
+
+ io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read);
+ io->fio = *fio;
+ }
+
+ if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
+ PAGE_CACHE_SIZE) {
+ __submit_merged_bio(io);
+ goto alloc_new;
+ }
+
+ io->last_block_in_bio = fio->blk_addr;
+ f2fs_trace_ios(page, fio, 0);
+
+ up_write(&io->io_rwsem);
+ trace_f2fs_submit_page_mbio(page, fio);
+}
+
+/*
+ * Lock ordering for the change of data block address:
+ * ->data_page
+ * ->node_page
+ * update block addresses in the node page
+ */
+static void __set_data_blkaddr(struct dnode_of_data *dn)
+{
+ struct f2fs_node *rn;
+ __le32 *addr_array;
+ struct page *node_page = dn->node_page;
+ unsigned int ofs_in_node = dn->ofs_in_node;
+
+ f2fs_wait_on_page_writeback(node_page, NODE);
+
+ rn = F2FS_NODE(node_page);
+
+ /* Get physical address of data block */
+ addr_array = blkaddr_in_node(rn);
+ addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
+ set_page_dirty(node_page);
+}
+
+int reserve_new_block(struct dnode_of_data *dn)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+ return -EPERM;
+ if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
+ return -ENOSPC;
+
+ trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
+
+ dn->data_blkaddr = NEW_ADDR;
+ __set_data_blkaddr(dn);
+ mark_inode_dirty(dn->inode);
+ sync_inode_page(dn);
+ return 0;
+}
+
+int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
+{
+ bool need_put = dn->inode_page ? false : true;
+ int err;
+
+ err = get_dnode_of_data(dn, index, ALLOC_NODE);
+ if (err)
+ return err;
+
+ if (dn->data_blkaddr == NULL_ADDR)
+ err = reserve_new_block(dn);
+ if (err || need_put)
+ f2fs_put_dnode(dn);
+ return err;
+}
+
+static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
+ struct buffer_head *bh_result)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ pgoff_t start_fofs, end_fofs;
+ block_t start_blkaddr;
+
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return 0;
+
+ read_lock(&fi->ext.ext_lock);
+ if (fi->ext.len == 0) {
+ read_unlock(&fi->ext.ext_lock);
+ return 0;
+ }
+
+ stat_inc_total_hit(inode->i_sb);
+
+ start_fofs = fi->ext.fofs;
+ end_fofs = fi->ext.fofs + fi->ext.len - 1;
+ start_blkaddr = fi->ext.blk_addr;
+
+ if (pgofs >= start_fofs && pgofs <= end_fofs) {
+ unsigned int blkbits = inode->i_sb->s_blocksize_bits;
+ size_t count;
+
+ set_buffer_new(bh_result);
+ map_bh(bh_result, inode->i_sb,
+ start_blkaddr + pgofs - start_fofs);
+ count = end_fofs - pgofs + 1;
+ if (count < (UINT_MAX >> blkbits))
+ bh_result->b_size = (count << blkbits);
+ else
+ bh_result->b_size = UINT_MAX;
+
+ stat_inc_read_hit(inode->i_sb);
+ read_unlock(&fi->ext.ext_lock);
+ return 1;
+ }
+ read_unlock(&fi->ext.ext_lock);
+ return 0;
+}
+
+void update_extent_cache(struct dnode_of_data *dn)
+{
+ struct f2fs_inode_info *fi = F2FS_I(dn->inode);
+ pgoff_t fofs, start_fofs, end_fofs;
+ block_t start_blkaddr, end_blkaddr;
+ int need_update = true;
+
+ f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
+
+ /* Update the page address in the parent node */
+ __set_data_blkaddr(dn);
+
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return;
+
+ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+ dn->ofs_in_node;
+
+ write_lock(&fi->ext.ext_lock);
+
+ start_fofs = fi->ext.fofs;
+ end_fofs = fi->ext.fofs + fi->ext.len - 1;
+ start_blkaddr = fi->ext.blk_addr;
+ end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
+
+ /* Drop and initialize the matched extent */
+ if (fi->ext.len == 1 && fofs == start_fofs)
+ fi->ext.len = 0;
+
+ /* Initial extent */
+ if (fi->ext.len == 0) {
+ if (dn->data_blkaddr != NULL_ADDR) {
+ fi->ext.fofs = fofs;
+ fi->ext.blk_addr = dn->data_blkaddr;
+ fi->ext.len = 1;
+ }
+ goto end_update;
+ }
+
+ /* Front merge */
+ if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) {
+ fi->ext.fofs--;
+ fi->ext.blk_addr--;
+ fi->ext.len++;
+ goto end_update;
+ }
+
+ /* Back merge */
+ if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) {
+ fi->ext.len++;
+ goto end_update;
+ }
+
+ /* Split the existing extent */
+ if (fi->ext.len > 1 &&
+ fofs >= start_fofs && fofs <= end_fofs) {
+ if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
+ fi->ext.len = fofs - start_fofs;
+ } else {
+ fi->ext.fofs = fofs + 1;
+ fi->ext.blk_addr = start_blkaddr +
+ fofs - start_fofs + 1;
+ fi->ext.len -= fofs - start_fofs + 1;
+ }
+ } else {
+ need_update = false;
+ }
+
+ /* Finally, if the extent is very fragmented, let's drop the cache. */
+ if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
+ fi->ext.len = 0;
+ set_inode_flag(fi, FI_NO_EXTENT);
+ need_update = true;
+ }
+end_update:
+ write_unlock(&fi->ext.ext_lock);
+ if (need_update)
+ sync_inode_page(dn);
+ return;
+}
+
+struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct dnode_of_data dn;
+ struct page *page;
+ int err;
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = sync ? READ_SYNC : READA,
+ };
+
+ page = find_get_page(mapping, index);
+ if (page && PageUptodate(page))
+ return page;
+ f2fs_put_page(page, 0);
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err)
+ return ERR_PTR(err);
+ f2fs_put_dnode(&dn);
+
+ if (dn.data_blkaddr == NULL_ADDR)
+ return ERR_PTR(-ENOENT);
+
+ /* By fallocate(), there is no cached page, but with NEW_ADDR */
+ if (unlikely(dn.data_blkaddr == NEW_ADDR))
+ return ERR_PTR(-EINVAL);
+
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ return page;
+ }
+
+ fio.blk_addr = dn.data_blkaddr;
+ err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
+ if (err)
+ return ERR_PTR(err);
+
+ if (sync) {
+ wait_on_page_locked(page);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 0);
+ return ERR_PTR(-EIO);
+ }
+ }
+ return page;
+}
+
+/*
+ * If it tries to access a hole, return an error.
+ * Because, the callers, functions in dir.c and GC, should be able to know
+ * whether this page exists or not.
+ */
+struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct dnode_of_data dn;
+ struct page *page;
+ int err;
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = READ_SYNC,
+ };
+repeat:
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(err);
+ }
+ f2fs_put_dnode(&dn);
+
+ if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-ENOENT);
+ }
+
+ if (PageUptodate(page))
+ return page;
+
+ /*
+ * A new dentry page is allocated but not able to be written, since its
+ * new inode page couldn't be allocated due to -ENOSPC.
+ * In such the case, its blkaddr can be remained as NEW_ADDR.
+ * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
+ */
+ if (dn.data_blkaddr == NEW_ADDR) {
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ return page;
+ }
+
+ fio.blk_addr = dn.data_blkaddr;
+ err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
+ if (err)
+ return ERR_PTR(err);
+
+ lock_page(page);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
+ return page;
+}
+
+/*
+ * Caller ensures that this data page is never allocated.
+ * A new zero-filled data page is allocated in the page cache.
+ *
+ * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
+ * Note that, ipage is set only by make_empty_dir.
+ */
+struct page *get_new_data_page(struct inode *inode,
+ struct page *ipage, pgoff_t index, bool new_i_size)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+ struct dnode_of_data dn;
+ int err;
+
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ err = f2fs_reserve_block(&dn, index);
+ if (err)
+ return ERR_PTR(err);
+repeat:
+ page = grab_cache_page(mapping, index);
+ if (!page) {
+ err = -ENOMEM;
+ goto put_err;
+ }
+
+ if (PageUptodate(page))
+ return page;
+
+ if (dn.data_blkaddr == NEW_ADDR) {
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ } else {
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = READ_SYNC,
+ .blk_addr = dn.data_blkaddr,
+ };
+ err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
+ if (err)
+ goto put_err;
+
+ lock_page(page);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ err = -EIO;
+ goto put_err;
+ }
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
+ }
+
+ if (new_i_size &&
+ i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
+ i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
+ /* Only the directory inode sets new_i_size */
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
+ }
+ return page;
+
+put_err:
+ f2fs_put_dnode(&dn);
+ return ERR_PTR(err);
+}
+
+static int __allocate_data_block(struct dnode_of_data *dn)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ struct f2fs_inode_info *fi = F2FS_I(dn->inode);
+ struct f2fs_summary sum;
+ struct node_info ni;
+ int seg = CURSEG_WARM_DATA;
+ pgoff_t fofs;
+
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+ return -EPERM;
+ if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
+ return -ENOSPC;
+
+ get_node_info(sbi, dn->nid, &ni);
+ set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+
+ if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
+ seg = CURSEG_DIRECT_IO;
+
+ allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg);
+
+ /* direct IO doesn't use extent cache to maximize the performance */
+ __set_data_blkaddr(dn);
+
+ /* update i_size */
+ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+ dn->ofs_in_node;
+ if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
+ i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
+
+ return 0;
+}
+
+static void __allocate_data_blocks(struct inode *inode, loff_t offset,
+ size_t count)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct dnode_of_data dn;
+ u64 start = F2FS_BYTES_TO_BLK(offset);
+ u64 len = F2FS_BYTES_TO_BLK(count);
+ bool allocated;
+ u64 end_offset;
+
+ while (len) {
+ f2fs_balance_fs(sbi);
+ f2fs_lock_op(sbi);
+
+ /* When reading holes, we need its node page */
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ if (get_dnode_of_data(&dn, start, ALLOC_NODE))
+ goto out;
+
+ allocated = false;
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+
+ while (dn.ofs_in_node < end_offset && len) {
+ if (dn.data_blkaddr == NULL_ADDR) {
+ if (__allocate_data_block(&dn))
+ goto sync_out;
+ allocated = true;
+ }
+ len--;
+ start++;
+ dn.ofs_in_node++;
+ }
+
+ if (allocated)
+ sync_inode_page(&dn);
+
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
+ }
+ return;
+
+sync_out:
+ if (allocated)
+ sync_inode_page(&dn);
+ f2fs_put_dnode(&dn);
+out:
+ f2fs_unlock_op(sbi);
+ return;
+}
+
+/*
+ * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
+ * If original data blocks are allocated, then give them to blockdev.
+ * Otherwise,
+ * a. preallocate requested block addresses
+ * b. do not use extent cache for better performance
+ * c. give the block addresses to blockdev
+ */
+static int __get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create, bool fiemap)
+{
+ unsigned int blkbits = inode->i_sb->s_blocksize_bits;
+ unsigned maxblocks = bh_result->b_size >> blkbits;
+ struct dnode_of_data dn;
+ int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
+ pgoff_t pgofs, end_offset;
+ int err = 0, ofs = 1;
+ bool allocated = false;
+
+ /* Get the page offset from the block offset(iblock) */
+ pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
+
+ if (check_extent_cache(inode, pgofs, bh_result))
+ goto out;
+
+ if (create)
+ f2fs_lock_op(F2FS_I_SB(inode));
+
+ /* When reading holes, we need its node page */
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, pgofs, mode);
+ if (err) {
+ if (err == -ENOENT)
+ err = 0;
+ goto unlock_out;
+ }
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
+ goto put_out;
+
+ if (dn.data_blkaddr != NULL_ADDR) {
+ set_buffer_new(bh_result);
+ map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ } else if (create) {
+ err = __allocate_data_block(&dn);
+ if (err)
+ goto put_out;
+ allocated = true;
+ set_buffer_new(bh_result);
+ map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ } else {
+ goto put_out;
+ }
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ bh_result->b_size = (((size_t)1) << blkbits);
+ dn.ofs_in_node++;
+ pgofs++;
+
+get_next:
+ if (dn.ofs_in_node >= end_offset) {
+ if (allocated)
+ sync_inode_page(&dn);
+ allocated = false;
+ f2fs_put_dnode(&dn);
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, pgofs, mode);
+ if (err) {
+ if (err == -ENOENT)
+ err = 0;
+ goto unlock_out;
+ }
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
+ goto put_out;
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ }
+
+ if (maxblocks > (bh_result->b_size >> blkbits)) {
+ block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ if (blkaddr == NULL_ADDR && create) {
+ err = __allocate_data_block(&dn);
+ if (err)
+ goto sync_out;
+ allocated = true;
+ blkaddr = dn.data_blkaddr;
+ }
+ /* Give more consecutive addresses for the readahead */
+ if (blkaddr == (bh_result->b_blocknr + ofs)) {
+ ofs++;
+ dn.ofs_in_node++;
+ pgofs++;
+ bh_result->b_size += (((size_t)1) << blkbits);
+ goto get_next;
+ }
+ }
+sync_out:
+ if (allocated)
+ sync_inode_page(&dn);
+put_out:
+ f2fs_put_dnode(&dn);
+unlock_out:
+ if (create)
+ f2fs_unlock_op(F2FS_I_SB(inode));
+out:
+ trace_f2fs_get_data_block(inode, iblock, bh_result, err);
+ return err;
+}
+
+static int get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, false);
+}
+
+static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, true);
+}
+
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+{
+ return generic_block_fiemap(inode, fieinfo,
+ start, len, get_data_block_fiemap);
+}
+
+static int f2fs_read_data_page(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ int ret = -EAGAIN;
+
+ trace_f2fs_readpage(page, DATA);
+
+ /* If the file has inline data, try to read it directly */
+ if (f2fs_has_inline_data(inode))
+ ret = f2fs_read_inline_data(inode, page);
+ if (ret == -EAGAIN)
+ ret = mpage_readpage(page, get_data_block);
+
+ return ret;
+}
+
+static int f2fs_read_data_pages(struct file *file,
+ struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ struct inode *inode = file->f_mapping->host;
+
+ /* If the file has inline data, skip readpages */
+ if (f2fs_has_inline_data(inode))
+ return 0;
+
+ return mpage_readpages(mapping, pages, nr_pages, get_data_block);
+}
+
+int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
+{
+ struct inode *inode = page->mapping->host;
+ struct dnode_of_data dn;
+ int err = 0;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
+ if (err)
+ return err;
+
+ fio->blk_addr = dn.data_blkaddr;
+
+ /* This page is already truncated */
+ if (fio->blk_addr == NULL_ADDR)
+ goto out_writepage;
+
+ set_page_writeback(page);
+
+ /*
+ * If current allocation needs SSR,
+ * it had better in-place writes for updated data.
+ */
+ if (unlikely(fio->blk_addr != NEW_ADDR &&
+ !is_cold_data(page) &&
+ need_inplace_update(inode))) {
+ rewrite_data_page(page, fio);
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
+ } else {
+ write_data_page(page, &dn, fio);
+ update_extent_cache(&dn);
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+ }
+out_writepage:
+ f2fs_put_dnode(&dn);
+ return err;
+}
+
+static int f2fs_write_data_page(struct page *page,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = page->mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ loff_t i_size = i_size_read(inode);
+ const pgoff_t end_index = ((unsigned long long) i_size)
+ >> PAGE_CACHE_SHIFT;
+ unsigned offset = 0;
+ bool need_balance_fs = false;
+ int err = 0;
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ };
+
+ trace_f2fs_writepage(page, DATA);
+
+ if (page->index < end_index)
+ goto write;
+
+ /*
+ * If the offset is out-of-range of file size,
+ * this page does not have to be written to disk.
+ */
+ offset = i_size & (PAGE_CACHE_SIZE - 1);
+ if ((page->index >= end_index + 1) || !offset)
+ goto out;
+
+ zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+write:
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto redirty_out;
+ if (f2fs_is_drop_cache(inode))
+ goto out;
+ if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim &&
+ available_free_memory(sbi, BASE_CHECK))
+ goto redirty_out;
+
+ /* Dentry blocks are controlled by checkpoint */
+ if (S_ISDIR(inode->i_mode)) {
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
+ err = do_write_data_page(page, &fio);
+ goto done;
+ }
+
+ /* we should bypass data pages to proceed the kworkder jobs */
+ if (unlikely(f2fs_cp_error(sbi))) {
+ SetPageError(page);
+ goto out;
+ }
+
+ if (!wbc->for_reclaim)
+ need_balance_fs = true;
+ else if (has_not_enough_free_secs(sbi, 0))
+ goto redirty_out;
+
+ err = -EAGAIN;
+ f2fs_lock_op(sbi);
+ if (f2fs_has_inline_data(inode))
+ err = f2fs_write_inline_data(inode, page);
+ if (err == -EAGAIN)
+ err = do_write_data_page(page, &fio);
+ f2fs_unlock_op(sbi);
+done:
+ if (err && err != -ENOENT)
+ goto redirty_out;
+
+ clear_cold_data(page);
+out:
+ inode_dec_dirty_pages(inode);
+ unlock_page(page);
+ if (need_balance_fs)
+ f2fs_balance_fs(sbi);
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ return 0;
+
+redirty_out:
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
+}
+
+static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
+ void *data)
+{
+ struct address_space *mapping = data;
+ int ret = mapping->a_ops->writepage(page, wbc);
+ mapping_set_error(mapping, ret);
+ return ret;
+}
+
+static int f2fs_write_data_pages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ bool locked = false;
+ int ret;
+ long diff;
+
+ trace_f2fs_writepages(mapping->host, wbc, DATA);
+
+ /* deal with chardevs and other special file */
+ if (!mapping->a_ops->writepage)
+ return 0;
+
+ if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+ get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
+ available_free_memory(sbi, DIRTY_DENTS))
+ goto skip_write;
+
+ diff = nr_pages_to_write(sbi, DATA, wbc);
+
+ if (!S_ISDIR(inode->i_mode)) {
+ mutex_lock(&sbi->writepages);
+ locked = true;
+ }
+ ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
+ if (locked)
+ mutex_unlock(&sbi->writepages);
+
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+
+ remove_dirty_dir_inode(inode);
+
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
+ return ret;
+
+skip_write:
+ wbc->pages_skipped += get_dirty_pages(inode);
+ return 0;
+}
+
+static void f2fs_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ truncate_pagecache(inode, 0, inode->i_size);
+ truncate_blocks(inode, inode->i_size, true);
+ }
+}
+
+static int f2fs_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ struct inode *inode = mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct page *page, *ipage;
+ pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
+ struct dnode_of_data dn;
+ int err = 0;
+
+ trace_f2fs_write_begin(inode, pos, len, flags);
+
+ f2fs_balance_fs(sbi);
+
+ /*
+ * We should check this at this moment to avoid deadlock on inode page
+ * and #0 page. The locking rule for inline_data conversion should be:
+ * lock_page(page #0) -> lock_page(inode_page)
+ */
+ if (index != 0) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ goto fail;
+ }
+repeat:
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ *pagep = page;
+
+ f2fs_lock_op(sbi);
+
+ /* check inline_data */
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto unlock_fail;
+ }
+
+ set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+ if (f2fs_has_inline_data(inode)) {
+ if (pos + len <= MAX_INLINE_DATA) {
+ read_inline_data(page, ipage);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+ sync_inode_page(&dn);
+ goto put_next;
+ }
+ err = f2fs_convert_inline_page(&dn, page);
+ if (err)
+ goto put_fail;
+ }
+ err = f2fs_reserve_block(&dn, index);
+ if (err)
+ goto put_fail;
+put_next:
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
+
+ if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
+ return 0;
+
+ f2fs_wait_on_page_writeback(page, DATA);
+
+ if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+ unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned end = start + len;
+
+ /* Reading beyond i_size is simple: memset to zero */
+ zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+ goto out;
+ }
+
+ if (dn.data_blkaddr == NEW_ADDR) {
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ } else {
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = READ_SYNC,
+ .blk_addr = dn.data_blkaddr,
+ };
+ err = f2fs_submit_page_bio(sbi, page, &fio);
+ if (err)
+ goto fail;
+
+ lock_page(page);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ err = -EIO;
+ goto fail;
+ }
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
+ }
+out:
+ SetPageUptodate(page);
+ clear_cold_data(page);
+ return 0;
+
+put_fail:
+ f2fs_put_dnode(&dn);
+unlock_fail:
+ f2fs_unlock_op(sbi);
+ f2fs_put_page(page, 1);
+fail:
+ f2fs_write_failed(mapping, pos + len);
+ return err;
+}
+
+static int f2fs_write_end(struct file *file,
+ struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = page->mapping->host;
+
+ trace_f2fs_write_end(inode, pos, len, copied);
+
+ set_page_dirty(page);
+
+ if (pos + copied > i_size_read(inode)) {
+ i_size_write(inode, pos + copied);
+ mark_inode_dirty(inode);
+ update_inode_page(inode);
+ }
+
+ f2fs_put_page(page, 1);
+ return copied;
+}
+
+static int check_direct_IO(struct inode *inode, int rw,
+ const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+{
+ unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
+ int i;
+
+ if (rw == READ)
+ return 0;
+
+ if (offset & blocksize_mask)
+ return -EINVAL;
+
+ for (i = 0; i < nr_segs; i++)
+ if (iov[i].iov_len & blocksize_mask)
+ return -EINVAL;
+
+ return 0;
+}
+
+static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
+ const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ size_t count = iov_length(iov, nr_segs);
+ int err;
+
+ /* we don't need to use inline_data strictly */
+ if (f2fs_has_inline_data(inode)) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
+
+ if (check_direct_IO(inode, rw, iov, offset, nr_segs))
+ return 0;
+
+ trace_f2fs_direct_IO_enter(inode, offset, count, rw);
+
+ if (rw & WRITE)
+ __allocate_data_blocks(inode, offset, count);
+
+ err = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ get_data_block);
+ if (err < 0 && (rw & WRITE))
+ f2fs_write_failed(mapping, offset + count);
+
+ trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
+
+ return err;
+}
+
+void f2fs_invalidate_page(struct page *page, unsigned long offset)
+{
+ struct inode *inode = page->mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (inode->i_ino >= F2FS_ROOT_INO(sbi) && (offset % PAGE_CACHE_SIZE))
+ return;
+
+ if (PageDirty(page)) {
+ if (inode->i_ino == F2FS_META_INO(sbi))
+ dec_page_count(sbi, F2FS_DIRTY_META);
+ else if (inode->i_ino == F2FS_NODE_INO(sbi))
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ else
+ inode_dec_dirty_pages(inode);
+ }
+ ClearPagePrivate(page);
+}
+
+int f2fs_release_page(struct page *page, gfp_t wait)
+{
+ /* If this is dirty page, keep PagePrivate */
+ if (PageDirty(page))
+ return 0;
+
+ ClearPagePrivate(page);
+ return 1;
+}
+
+static int f2fs_set_data_page_dirty(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+
+ trace_f2fs_set_page_dirty(page, DATA);
+
+ SetPageUptodate(page);
+
+ if (f2fs_is_atomic_file(inode)) {
+ register_inmem_page(inode, page);
+ return 1;
+ }
+
+ mark_inode_dirty(inode);
+
+ if (!PageDirty(page)) {
+ __set_page_dirty_nobuffers(page);
+ update_dirty_page(inode, page);
+ return 1;
+ }
+ return 0;
+}
+
+static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
+{
+ struct inode *inode = mapping->host;
+
+ /* we don't need to use inline_data strictly */
+ if (f2fs_has_inline_data(inode)) {
+ int err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
+ return generic_block_bmap(mapping, block, get_data_block);
+}
+
+const struct address_space_operations f2fs_dblock_aops = {
+ .readpage = f2fs_read_data_page,
+ .readpages = f2fs_read_data_pages,
+ .writepage = f2fs_write_data_page,
+ .writepages = f2fs_write_data_pages,
+ .write_begin = f2fs_write_begin,
+ .write_end = f2fs_write_end,
+ .set_page_dirty = f2fs_set_data_page_dirty,
+ .invalidatepage = f2fs_invalidate_page,
+ .releasepage = f2fs_release_page,
+ .direct_IO = f2fs_direct_IO,
+ .bmap = f2fs_bmap,
+};
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
new file mode 100644
index 0000000..e671373
--- /dev/null
+++ b/fs/f2fs/debug.c
@@ -0,0 +1,401 @@
+/*
+ * f2fs debugging statistics
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ * Copyright (c) 2012 Linux Foundation
+ * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/backing-dev.h>
+#include <linux/f2fs_fs.h>
+#include <linux/blkdev.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "gc.h"
+
+static LIST_HEAD(f2fs_stat_list);
+static struct dentry *f2fs_debugfs_root;
+static DEFINE_MUTEX(f2fs_stat_mutex);
+
+static void update_general_status(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ int i;
+
+ /* validation check of the segment numbers */
+ si->hit_ext = sbi->read_hit_ext;
+ si->total_ext = sbi->total_hit_ext;
+ si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
+ si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
+ si->ndirty_dirs = sbi->n_dirty_dirs;
+ si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
+ si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
+ si->wb_pages = get_pages(sbi, F2FS_WRITEBACK);
+ si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
+ si->rsvd_segs = reserved_segments(sbi);
+ si->overp_segs = overprovision_segments(sbi);
+ si->valid_count = valid_user_blocks(sbi);
+ si->valid_node_count = valid_node_count(sbi);
+ si->valid_inode_count = valid_inode_count(sbi);
+ si->inline_inode = atomic_read(&sbi->inline_inode);
+ si->inline_dir = atomic_read(&sbi->inline_dir);
+ si->utilization = utilization(sbi);
+
+ si->free_segs = free_segments(sbi);
+ si->free_secs = free_sections(sbi);
+ si->prefree_count = prefree_segments(sbi);
+ si->dirty_count = dirty_segments(sbi);
+ si->node_pages = NODE_MAPPING(sbi)->nrpages;
+ si->meta_pages = META_MAPPING(sbi)->nrpages;
+ si->nats = NM_I(sbi)->nat_cnt;
+ si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
+ si->sits = MAIN_SEGS(sbi);
+ si->dirty_sits = SIT_I(sbi)->dirty_sentries;
+ si->fnids = NM_I(sbi)->fcnt;
+ si->bg_gc = sbi->bg_gc;
+ si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
+ * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
+ / 2;
+ si->util_valid = (int)(written_block_count(sbi) >>
+ sbi->log_blocks_per_seg)
+ * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
+ / 2;
+ si->util_invalid = 50 - si->util_free - si->util_valid;
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
+ struct curseg_info *curseg = CURSEG_I(sbi, i);
+ si->curseg[i] = curseg->segno;
+ si->cursec[i] = curseg->segno / sbi->segs_per_sec;
+ si->curzone[i] = si->cursec[i] / sbi->secs_per_zone;
+ }
+
+ for (i = 0; i < 2; i++) {
+ si->segment_count[i] = sbi->segment_count[i];
+ si->block_count[i] = sbi->block_count[i];
+ }
+
+ si->inplace_count = atomic_read(&sbi->inplace_count);
+}
+
+/*
+ * This function calculates BDF of every segments
+ */
+static void update_sit_info(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
+ unsigned int segno, vblocks;
+ int ndirty = 0;
+
+ bimodal = 0;
+ total_vblocks = 0;
+ blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
+ hblks_per_sec = blks_per_sec / 2;
+ for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+ vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+ dist = abs(vblocks - hblks_per_sec);
+ bimodal += dist * dist;
+
+ if (vblocks > 0 && vblocks < blks_per_sec) {
+ total_vblocks += vblocks;
+ ndirty++;
+ }
+ }
+ dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
+ si->bimodal = bimodal / dist;
+ if (si->dirty_count)
+ si->avg_vblocks = total_vblocks / ndirty;
+ else
+ si->avg_vblocks = 0;
+}
+
+/*
+ * This function calculates memory footprint.
+ */
+static void update_mem_info(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ unsigned npages;
+ int i;
+
+ if (si->base_mem)
+ goto get_cache;
+
+ si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
+ si->base_mem += 2 * sizeof(struct f2fs_inode_info);
+ si->base_mem += sizeof(*sbi->ckpt);
+
+ /* build sm */
+ si->base_mem += sizeof(struct f2fs_sm_info);
+
+ /* build sit */
+ si->base_mem += sizeof(struct sit_info);
+ si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
+ si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
+ si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
+ si->base_mem += SIT_VBLOCK_MAP_SIZE;
+ if (sbi->segs_per_sec > 1)
+ si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
+ si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
+
+ /* build free segmap */
+ si->base_mem += sizeof(struct free_segmap_info);
+ si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
+ si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi));
+
+ /* build curseg */
+ si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
+ si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE;
+
+ /* build dirty segmap */
+ si->base_mem += sizeof(struct dirty_seglist_info);
+ si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(MAIN_SEGS(sbi));
+ si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi));
+
+ /* build nm */
+ si->base_mem += sizeof(struct f2fs_nm_info);
+ si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
+
+get_cache:
+ si->cache_mem = 0;
+
+ /* build gc */
+ if (sbi->gc_thread)
+ si->cache_mem += sizeof(struct f2fs_gc_kthread);
+
+ /* build merge flush thread */
+ if (SM_I(sbi)->cmd_control_info)
+ si->cache_mem += sizeof(struct flush_cmd_control);
+
+ /* free nids */
+ si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid);
+ si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
+ si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
+ sizeof(struct nat_entry_set);
+ si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
+ si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
+ for (i = 0; i <= UPDATE_INO; i++)
+ si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
+
+ si->page_mem = 0;
+ npages = NODE_MAPPING(sbi)->nrpages;
+ si->page_mem += npages << PAGE_CACHE_SHIFT;
+ npages = META_MAPPING(sbi)->nrpages;
+ si->page_mem += npages << PAGE_CACHE_SHIFT;
+}
+
+static int stat_show(struct seq_file *s, void *v)
+{
+ struct f2fs_stat_info *si;
+ int i = 0;
+ int j;
+
+ mutex_lock(&f2fs_stat_mutex);
+ list_for_each_entry(si, &f2fs_stat_list, stat_list) {
+ char devname[BDEVNAME_SIZE];
+
+ update_general_status(si->sbi);
+
+ seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n",
+ bdevname(si->sbi->sb->s_bdev, devname), i++);
+ seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
+ si->sit_area_segs, si->nat_area_segs);
+ seq_printf(s, "[SSA: %d] [MAIN: %d",
+ si->ssa_area_segs, si->main_area_segs);
+ seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
+ si->overp_segs, si->rsvd_segs);
+ seq_printf(s, "Utilization: %d%% (%d valid blocks)\n",
+ si->utilization, si->valid_count);
+ seq_printf(s, " - Node: %u (Inode: %u, ",
+ si->valid_node_count, si->valid_inode_count);
+ seq_printf(s, "Other: %u)\n - Data: %u\n",
+ si->valid_node_count - si->valid_inode_count,
+ si->valid_count - si->valid_node_count);
+ seq_printf(s, " - Inline_data Inode: %u\n",
+ si->inline_inode);
+ seq_printf(s, " - Inline_dentry Inode: %u\n",
+ si->inline_dir);
+ seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
+ si->main_area_segs, si->main_area_sections,
+ si->main_area_zones);
+ seq_printf(s, " - COLD data: %d, %d, %d\n",
+ si->curseg[CURSEG_COLD_DATA],
+ si->cursec[CURSEG_COLD_DATA],
+ si->curzone[CURSEG_COLD_DATA]);
+ seq_printf(s, " - WARM data: %d, %d, %d\n",
+ si->curseg[CURSEG_WARM_DATA],
+ si->cursec[CURSEG_WARM_DATA],
+ si->curzone[CURSEG_WARM_DATA]);
+ seq_printf(s, " - HOT data: %d, %d, %d\n",
+ si->curseg[CURSEG_HOT_DATA],
+ si->cursec[CURSEG_HOT_DATA],
+ si->curzone[CURSEG_HOT_DATA]);
+ seq_printf(s, " - Dir dnode: %d, %d, %d\n",
+ si->curseg[CURSEG_HOT_NODE],
+ si->cursec[CURSEG_HOT_NODE],
+ si->curzone[CURSEG_HOT_NODE]);
+ seq_printf(s, " - File dnode: %d, %d, %d\n",
+ si->curseg[CURSEG_WARM_NODE],
+ si->cursec[CURSEG_WARM_NODE],
+ si->curzone[CURSEG_WARM_NODE]);
+ seq_printf(s, " - Indir nodes: %d, %d, %d\n",
+ si->curseg[CURSEG_COLD_NODE],
+ si->cursec[CURSEG_COLD_NODE],
+ si->curzone[CURSEG_COLD_NODE]);
+ seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
+ si->main_area_segs - si->dirty_count -
+ si->prefree_count - si->free_segs,
+ si->dirty_count);
+ seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
+ si->prefree_count, si->free_segs, si->free_secs);
+ seq_printf(s, "CP calls: %d\n", si->cp_count);
+ seq_printf(s, "GC calls: %d (BG: %d)\n",
+ si->call_count, si->bg_gc);
+ seq_printf(s, " - data segments : %d\n", si->data_segs);
+ seq_printf(s, " - node segments : %d\n", si->node_segs);
+ seq_printf(s, "Try to move %d blocks\n", si->tot_blks);
+ seq_printf(s, " - data blocks : %d\n", si->data_blks);
+ seq_printf(s, " - node blocks : %d\n", si->node_blks);
+ seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
+ si->hit_ext, si->total_ext);
+ seq_puts(s, "\nBalancing F2FS Async:\n");
+ seq_printf(s, " - inmem: %4d, wb: %4d\n",
+ si->inmem_pages, si->wb_pages);
+ seq_printf(s, " - nodes: %4d in %4d\n",
+ si->ndirty_node, si->node_pages);
+ seq_printf(s, " - dents: %4d in dirs:%4d\n",
+ si->ndirty_dent, si->ndirty_dirs);
+ seq_printf(s, " - meta: %4d in %4d\n",
+ si->ndirty_meta, si->meta_pages);
+ seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
+ si->dirty_nats, si->nats, si->dirty_sits, si->sits);
+ seq_printf(s, " - free_nids: %9d\n",
+ si->fnids);
+ seq_puts(s, "\nDistribution of User Blocks:");
+ seq_puts(s, " [ valid | invalid | free ]\n");
+ seq_puts(s, " [");
+
+ for (j = 0; j < si->util_valid; j++)
+ seq_putc(s, '-');
+ seq_putc(s, '|');
+
+ for (j = 0; j < si->util_invalid; j++)
+ seq_putc(s, '-');
+ seq_putc(s, '|');
+
+ for (j = 0; j < si->util_free; j++)
+ seq_putc(s, '-');
+ seq_puts(s, "]\n\n");
+ seq_printf(s, "IPU: %u blocks\n", si->inplace_count);
+ seq_printf(s, "SSR: %u blocks in %u segments\n",
+ si->block_count[SSR], si->segment_count[SSR]);
+ seq_printf(s, "LFS: %u blocks in %u segments\n",
+ si->block_count[LFS], si->segment_count[LFS]);
+
+ /* segment usage info */
+ update_sit_info(si->sbi);
+ seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n",
+ si->bimodal, si->avg_vblocks);
+
+ /* memory footprint */
+ update_mem_info(si->sbi);
+ seq_printf(s, "\nMemory: %u KB\n",
+ (si->base_mem + si->cache_mem + si->page_mem) >> 10);
+ seq_printf(s, " - static: %u KB\n",
+ si->base_mem >> 10);
+ seq_printf(s, " - cached: %u KB\n",
+ si->cache_mem >> 10);
+ seq_printf(s, " - paged : %u KB\n",
+ si->page_mem >> 10);
+ }
+ mutex_unlock(&f2fs_stat_mutex);
+ return 0;
+}
+
+static int stat_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, stat_show, inode->i_private);
+}
+
+static const struct file_operations stat_fops = {
+ .open = stat_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int f2fs_build_stats(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ struct f2fs_stat_info *si;
+
+ si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL);
+ if (!si)
+ return -ENOMEM;
+
+ si->all_area_segs = le32_to_cpu(raw_super->segment_count);
+ si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
+ si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
+ si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa);
+ si->main_area_segs = le32_to_cpu(raw_super->segment_count_main);
+ si->main_area_sections = le32_to_cpu(raw_super->section_count);
+ si->main_area_zones = si->main_area_sections /
+ le32_to_cpu(raw_super->secs_per_zone);
+ si->sbi = sbi;
+ sbi->stat_info = si;
+
+ atomic_set(&sbi->inline_inode, 0);
+ atomic_set(&sbi->inline_dir, 0);
+ atomic_set(&sbi->inplace_count, 0);
+
+ mutex_lock(&f2fs_stat_mutex);
+ list_add_tail(&si->stat_list, &f2fs_stat_list);
+ mutex_unlock(&f2fs_stat_mutex);
+
+ return 0;
+}
+
+void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_stat_info *si = F2FS_STAT(sbi);
+
+ mutex_lock(&f2fs_stat_mutex);
+ list_del(&si->stat_list);
+ mutex_unlock(&f2fs_stat_mutex);
+
+ kfree(si);
+}
+
+void __init f2fs_create_root_stats(void)
+{
+ struct dentry *file;
+
+ f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
+ if (!f2fs_debugfs_root)
+ return;
+
+ file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
+ NULL, &stat_fops);
+ if (!file) {
+ debugfs_remove(f2fs_debugfs_root);
+ f2fs_debugfs_root = NULL;
+ }
+}
+
+void f2fs_destroy_root_stats(void)
+{
+ if (!f2fs_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(f2fs_debugfs_root);
+ f2fs_debugfs_root = NULL;
+}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
new file mode 100644
index 0000000..1e65ced
--- /dev/null
+++ b/fs/f2fs/dir.c
@@ -0,0 +1,805 @@
+/*
+ * fs/f2fs/dir.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include "f2fs.h"
+#include "node.h"
+#include "acl.h"
+#include "xattr.h"
+
+static unsigned long dir_blocks(struct inode *inode)
+{
+ return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
+ >> PAGE_CACHE_SHIFT;
+}
+
+static unsigned int dir_buckets(unsigned int level, int dir_level)
+{
+ if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
+ return 1 << (level + dir_level);
+ else
+ return MAX_DIR_BUCKETS;
+}
+
+static unsigned int bucket_blocks(unsigned int level)
+{
+ if (level < MAX_DIR_HASH_DEPTH / 2)
+ return 2;
+ else
+ return 4;
+}
+
+unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
+ [F2FS_FT_UNKNOWN] = DT_UNKNOWN,
+ [F2FS_FT_REG_FILE] = DT_REG,
+ [F2FS_FT_DIR] = DT_DIR,
+ [F2FS_FT_CHRDEV] = DT_CHR,
+ [F2FS_FT_BLKDEV] = DT_BLK,
+ [F2FS_FT_FIFO] = DT_FIFO,
+ [F2FS_FT_SOCK] = DT_SOCK,
+ [F2FS_FT_SYMLINK] = DT_LNK,
+};
+
+#define S_SHIFT 12
+static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
+ [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE,
+ [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR,
+ [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV,
+ [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV,
+ [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO,
+ [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK,
+ [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
+};
+
+void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+ de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
+}
+
+static unsigned long dir_block_index(unsigned int level,
+ int dir_level, unsigned int idx)
+{
+ unsigned long i;
+ unsigned long bidx = 0;
+
+ for (i = 0; i < level; i++)
+ bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
+ bidx += idx * bucket_blocks(level);
+ return bidx;
+}
+
+static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
+ struct f2fs_dir_entry *de)
+{
+ if (le16_to_cpu(de->name_len) != namelen)
+ return false;
+
+ if (de->hash_code != namehash)
+ return false;
+
+ return true;
+}
+
+static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
+ struct qstr *name, int *max_slots,
+ struct page **res_page)
+{
+ struct f2fs_dentry_block *dentry_blk;
+ struct f2fs_dir_entry *de;
+ struct f2fs_dentry_ptr d;
+
+ dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+ de = find_target_dentry(name, max_slots, &d);
+
+ if (de)
+ *res_page = dentry_page;
+ else
+ kunmap(dentry_page);
+
+ /*
+ * For the most part, it should be a bug when name_len is zero.
+ * We stop here for figuring out where the bugs has occurred.
+ */
+ f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0);
+ return de;
+}
+
+struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
+ struct f2fs_dentry_ptr *d)
+{
+ struct f2fs_dir_entry *de;
+ unsigned long bit_pos = 0;
+ f2fs_hash_t namehash = f2fs_dentry_hash(name);
+ int max_len = 0;
+
+ if (max_slots)
+ *max_slots = 0;
+ while (bit_pos < d->max) {
+ if (!test_bit_le(bit_pos, d->bitmap)) {
+ if (bit_pos == 0)
+ max_len = 1;
+ else if (!test_bit_le(bit_pos - 1, d->bitmap))
+ max_len++;
+ bit_pos++;
+ continue;
+ }
+ de = &d->dentry[bit_pos];
+ if (early_match_name(name->len, namehash, de) &&
+ !memcmp(d->filename[bit_pos], name->name, name->len))
+ goto found;
+
+ if (max_slots && *max_slots >= 0 && max_len > *max_slots) {
+ *max_slots = max_len;
+ max_len = 0;
+ }
+
+ /* remain bug on condition */
+ if (unlikely(!de->name_len))
+ d->max = -1;
+
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ }
+
+ de = NULL;
+found:
+ if (max_slots && max_len > *max_slots)
+ *max_slots = max_len;
+ return de;
+}
+
+static struct f2fs_dir_entry *find_in_level(struct inode *dir,
+ unsigned int level, struct qstr *name,
+ f2fs_hash_t namehash, struct page **res_page)
+{
+ int s = GET_DENTRY_SLOTS(name->len);
+ unsigned int nbucket, nblock;
+ unsigned int bidx, end_block;
+ struct page *dentry_page;
+ struct f2fs_dir_entry *de = NULL;
+ bool room = false;
+ int max_slots;
+
+ f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
+
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
+ nblock = bucket_blocks(level);
+
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ le32_to_cpu(namehash) % nbucket);
+ end_block = bidx + nblock;
+
+ for (; bidx < end_block; bidx++) {
+ /* no need to allocate new dentry pages to all the indices */
+ dentry_page = find_data_page(dir, bidx, true);
+ if (IS_ERR(dentry_page)) {
+ room = true;
+ continue;
+ }
+
+ de = find_in_block(dentry_page, name, &max_slots, res_page);
+ if (de)
+ break;
+
+ if (max_slots >= s)
+ room = true;
+ f2fs_put_page(dentry_page, 0);
+ }
+
+ if (!de && room && F2FS_I(dir)->chash != namehash) {
+ F2FS_I(dir)->chash = namehash;
+ F2FS_I(dir)->clevel = level;
+ }
+
+ return de;
+}
+
+/*
+ * Find an entry in the specified directory with the wanted name.
+ * It returns the page where the entry was found (as a parameter - res_page),
+ * and the entry itself. Page is returned mapped and unlocked.
+ * Entry is guaranteed to be valid.
+ */
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
+ struct qstr *child, struct page **res_page)
+{
+ unsigned long npages = dir_blocks(dir);
+ struct f2fs_dir_entry *de = NULL;
+ f2fs_hash_t name_hash;
+ unsigned int max_depth;
+ unsigned int level;
+
+ if (f2fs_has_inline_dentry(dir))
+ return find_in_inline_dir(dir, child, res_page);
+
+ if (npages == 0)
+ return NULL;
+
+ *res_page = NULL;
+
+ name_hash = f2fs_dentry_hash(child);
+ max_depth = F2FS_I(dir)->i_current_depth;
+
+ for (level = 0; level < max_depth; level++) {
+ de = find_in_level(dir, level, child, name_hash, res_page);
+ if (de)
+ break;
+ }
+ if (!de && F2FS_I(dir)->chash != name_hash) {
+ F2FS_I(dir)->chash = name_hash;
+ F2FS_I(dir)->clevel = level - 1;
+ }
+ return de;
+}
+
+struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
+{
+ struct page *page;
+ struct f2fs_dir_entry *de;
+ struct f2fs_dentry_block *dentry_blk;
+
+ if (f2fs_has_inline_dentry(dir))
+ return f2fs_parent_inline_dir(dir, p);
+
+ page = get_lock_data_page(dir, 0);
+ if (IS_ERR(page))
+ return NULL;
+
+ dentry_blk = kmap(page);
+ de = &dentry_blk->dentry[1];
+ *p = page;
+ unlock_page(page);
+ return de;
+}
+
+ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
+{
+ ino_t res = 0;
+ struct f2fs_dir_entry *de;
+ struct page *page;
+
+ de = f2fs_find_entry(dir, qstr, &page);
+ if (de) {
+ res = le32_to_cpu(de->ino);
+ f2fs_dentry_kunmap(dir, page);
+ f2fs_put_page(page, 0);
+ }
+
+ return res;
+}
+
+void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
+ struct page *page, struct inode *inode)
+{
+ enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
+ lock_page(page);
+ f2fs_wait_on_page_writeback(page, type);
+ de->ino = cpu_to_le32(inode->i_ino);
+ set_de_type(de, inode);
+ f2fs_dentry_kunmap(dir, page);
+ set_page_dirty(page);
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(dir);
+
+ f2fs_put_page(page, 1);
+}
+
+static void init_dent_inode(const struct qstr *name, struct page *ipage)
+{
+ struct f2fs_inode *ri;
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
+ /* copy name info. to this inode page */
+ ri = F2FS_INODE(ipage);
+ ri->i_namelen = cpu_to_le32(name->len);
+ memcpy(ri->i_name, name->name, name->len);
+ set_page_dirty(ipage);
+}
+
+int update_dent_inode(struct inode *inode, const struct qstr *name)
+{
+ struct page *page;
+
+ page = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ init_dent_inode(name, page);
+ f2fs_put_page(page, 1);
+
+ return 0;
+}
+
+void do_make_empty_dir(struct inode *inode, struct inode *parent,
+ struct f2fs_dentry_ptr *d)
+{
+ struct f2fs_dir_entry *de;
+
+ de = &d->dentry[0];
+ de->name_len = cpu_to_le16(1);
+ de->hash_code = 0;
+ de->ino = cpu_to_le32(inode->i_ino);
+ memcpy(d->filename[0], ".", 1);
+ set_de_type(de, inode);
+
+ de = &d->dentry[1];
+ de->hash_code = 0;
+ de->name_len = cpu_to_le16(2);
+ de->ino = cpu_to_le32(parent->i_ino);
+ memcpy(d->filename[1], "..", 2);
+ set_de_type(de, inode);
+
+ test_and_set_bit_le(0, (void *)d->bitmap);
+ test_and_set_bit_le(1, (void *)d->bitmap);
+}
+
+static int make_empty_dir(struct inode *inode,
+ struct inode *parent, struct page *page)
+{
+ struct page *dentry_page;
+ struct f2fs_dentry_block *dentry_blk;
+ struct f2fs_dentry_ptr d;
+
+ if (f2fs_has_inline_dentry(inode))
+ return make_empty_inline_dir(inode, parent, page);
+
+ dentry_page = get_new_data_page(inode, page, 0, true);
+ if (IS_ERR(dentry_page))
+ return PTR_ERR(dentry_page);
+
+ dentry_blk = kmap_atomic(dentry_page);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+ do_make_empty_dir(inode, parent, &d);
+
+ kunmap_atomic(dentry_blk);
+
+ set_page_dirty(dentry_page);
+ f2fs_put_page(dentry_page, 1);
+ return 0;
+}
+
+struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+ const struct qstr *name, struct page *dpage)
+{
+ struct page *page;
+ int err;
+
+ if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
+ page = new_inode_page(inode);
+ if (IS_ERR(page))
+ return page;
+
+ if (S_ISDIR(inode->i_mode)) {
+ err = make_empty_dir(inode, dir, page);
+ if (err)
+ goto error;
+ }
+
+ err = f2fs_init_acl(inode, dir, page, dpage);
+ if (err)
+ goto put_error;
+
+ err = f2fs_init_security(inode, dir, name, page);
+ if (err)
+ goto put_error;
+ } else {
+ page = get_node_page(F2FS_I_SB(dir), inode->i_ino);
+ if (IS_ERR(page))
+ return page;
+
+ set_cold_node(inode, page);
+ }
+
+ if (name)
+ init_dent_inode(name, page);
+
+ /*
+ * This file should be checkpointed during fsync.
+ * We lost i_pino from now on.
+ */
+ if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
+ file_lost_pino(inode);
+ /*
+ * If link the tmpfile to alias through linkat path,
+ * we should remove this inode from orphan list.
+ */
+ if (inode->i_nlink == 0)
+ remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino);
+ inc_nlink(inode);
+ }
+ return page;
+
+put_error:
+ f2fs_put_page(page, 1);
+error:
+ /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
+ truncate_inode_pages(&inode->i_data, 0);
+ truncate_blocks(inode, 0, false);
+ remove_dirty_dir_inode(inode);
+ remove_inode_page(inode);
+ return ERR_PTR(err);
+}
+
+void update_parent_metadata(struct inode *dir, struct inode *inode,
+ unsigned int current_depth)
+{
+ if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
+ if (S_ISDIR(inode->i_mode)) {
+ inc_nlink(dir);
+ set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
+ clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+ }
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(dir);
+
+ if (F2FS_I(dir)->i_current_depth != current_depth) {
+ F2FS_I(dir)->i_current_depth = current_depth;
+ set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
+
+ if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
+ clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+}
+
+int room_for_filename(const void *bitmap, int slots, int max_slots)
+{
+ int bit_start = 0;
+ int zero_start, zero_end;
+next:
+ zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start);
+ if (zero_start >= max_slots)
+ return max_slots;
+
+ zero_end = find_next_bit_le(bitmap, max_slots, zero_start);
+ if (zero_end - zero_start >= slots)
+ return zero_start;
+
+ bit_start = zero_end + 1;
+
+ if (zero_end + 1 >= max_slots)
+ return max_slots;
+ goto next;
+}
+
+/*
+ * Caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
+ */
+int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+ struct inode *inode)
+{
+ unsigned int bit_pos;
+ unsigned int level;
+ unsigned int current_depth;
+ unsigned long bidx, block;
+ f2fs_hash_t dentry_hash;
+ struct f2fs_dir_entry *de;
+ unsigned int nbucket, nblock;
+ size_t namelen = name->len;
+ struct page *dentry_page = NULL;
+ struct f2fs_dentry_block *dentry_blk = NULL;
+ int slots = GET_DENTRY_SLOTS(namelen);
+ struct page *page;
+ int err = 0;
+ int i;
+
+ if (f2fs_has_inline_dentry(dir)) {
+ err = f2fs_add_inline_entry(dir, name, inode);
+ if (!err || err != -EAGAIN)
+ return err;
+ else
+ err = 0;
+ }
+
+ dentry_hash = f2fs_dentry_hash(name);
+ level = 0;
+ current_depth = F2FS_I(dir)->i_current_depth;
+ if (F2FS_I(dir)->chash == dentry_hash) {
+ level = F2FS_I(dir)->clevel;
+ F2FS_I(dir)->chash = 0;
+ }
+
+start:
+ if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
+ return -ENOSPC;
+
+ /* Increase the depth, if required */
+ if (level == current_depth)
+ ++current_depth;
+
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
+ nblock = bucket_blocks(level);
+
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ (le32_to_cpu(dentry_hash) % nbucket));
+
+ for (block = bidx; block <= (bidx + nblock - 1); block++) {
+ dentry_page = get_new_data_page(dir, NULL, block, true);
+ if (IS_ERR(dentry_page))
+ return PTR_ERR(dentry_page);
+
+ dentry_blk = kmap(dentry_page);
+ bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+ slots, NR_DENTRY_IN_BLOCK);
+ if (bit_pos < NR_DENTRY_IN_BLOCK)
+ goto add_dentry;
+
+ kunmap(dentry_page);
+ f2fs_put_page(dentry_page, 1);
+ }
+
+ /* Move to next level to find the empty slot for new dentry */
+ ++level;
+ goto start;
+add_dentry:
+ f2fs_wait_on_page_writeback(dentry_page, DATA);
+
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, name, NULL);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
+ de = &dentry_blk->dentry[bit_pos];
+ de->hash_code = dentry_hash;
+ de->name_len = cpu_to_le16(namelen);
+ memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
+ de->ino = cpu_to_le32(inode->i_ino);
+ set_de_type(de, inode);
+ for (i = 0; i < slots; i++)
+ test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+ set_page_dirty(dentry_page);
+
+ /* we don't need to mark_inode_dirty now */
+ F2FS_I(inode)->i_pino = dir->i_ino;
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+
+ update_parent_metadata(dir, inode, current_depth);
+fail:
+ up_write(&F2FS_I(inode)->i_sem);
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
+ update_inode_page(dir);
+ clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
+ kunmap(dentry_page);
+ f2fs_put_page(dentry_page, 1);
+ return err;
+}
+
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
+{
+ struct page *page;
+ int err = 0;
+
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, NULL, NULL);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
+ /* we don't need to mark_inode_dirty now */
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+
+ clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
+fail:
+ up_write(&F2FS_I(inode)->i_sem);
+ return err;
+}
+
+void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+
+ down_write(&F2FS_I(inode)->i_sem);
+
+ if (S_ISDIR(inode->i_mode)) {
+ drop_nlink(dir);
+ if (page)
+ update_inode(dir, page);
+ else
+ update_inode_page(dir);
+ }
+ inode->i_ctime = CURRENT_TIME;
+
+ drop_nlink(inode);
+ if (S_ISDIR(inode->i_mode)) {
+ drop_nlink(inode);
+ i_size_write(inode, 0);
+ }
+ up_write(&F2FS_I(inode)->i_sem);
+ update_inode_page(inode);
+
+ if (inode->i_nlink == 0)
+ add_orphan_inode(sbi, inode->i_ino);
+ else
+ release_orphan_inode(sbi);
+}
+
+/*
+ * It only removes the dentry from the dentry page, corresponding name
+ * entry in name page does not need to be touched during deletion.
+ */
+void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
+ struct inode *dir, struct inode *inode)
+{
+ struct f2fs_dentry_block *dentry_blk;
+ unsigned int bit_pos;
+ int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
+ int i;
+
+ if (f2fs_has_inline_dentry(dir))
+ return f2fs_delete_inline_entry(dentry, page, dir, inode);
+
+ lock_page(page);
+ f2fs_wait_on_page_writeback(page, DATA);
+
+ dentry_blk = page_address(page);
+ bit_pos = dentry - dentry_blk->dentry;
+ for (i = 0; i < slots; i++)
+ test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+
+ /* Let's check and deallocate this dentry page */
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+ NR_DENTRY_IN_BLOCK,
+ 0);
+ kunmap(page); /* kunmap - pair of f2fs_find_entry */
+ set_page_dirty(page);
+
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+
+ if (inode)
+ f2fs_drop_nlink(dir, inode, NULL);
+
+ if (bit_pos == NR_DENTRY_IN_BLOCK) {
+ truncate_hole(dir, page->index, page->index + 1);
+ clear_page_dirty_for_io(page);
+ ClearPageUptodate(page);
+ inode_dec_dirty_pages(dir);
+ }
+ f2fs_put_page(page, 1);
+}
+
+bool f2fs_empty_dir(struct inode *dir)
+{
+ unsigned long bidx;
+ struct page *dentry_page;
+ unsigned int bit_pos;
+ struct f2fs_dentry_block *dentry_blk;
+ unsigned long nblock = dir_blocks(dir);
+
+ if (f2fs_has_inline_dentry(dir))
+ return f2fs_empty_inline_dir(dir);
+
+ for (bidx = 0; bidx < nblock; bidx++) {
+ dentry_page = get_lock_data_page(dir, bidx);
+ if (IS_ERR(dentry_page)) {
+ if (PTR_ERR(dentry_page) == -ENOENT)
+ continue;
+ else
+ return false;
+ }
+
+ dentry_blk = kmap_atomic(dentry_page);
+ if (bidx == 0)
+ bit_pos = 2;
+ else
+ bit_pos = 0;
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+ NR_DENTRY_IN_BLOCK,
+ bit_pos);
+ kunmap_atomic(dentry_blk);
+
+ f2fs_put_page(dentry_page, 1);
+
+ if (bit_pos < NR_DENTRY_IN_BLOCK)
+ return false;
+ }
+ return true;
+}
+
+bool f2fs_fill_dentries(struct file *file, void *dirent, filldir_t filldir,
+ struct f2fs_dentry_ptr *d, unsigned int n, unsigned int bit_pos)
+{
+ unsigned int start_bit_pos = bit_pos;
+ unsigned char d_type;
+ struct f2fs_dir_entry *de = NULL;
+ unsigned char *types = f2fs_filetype_table;
+ int over;
+
+ while (bit_pos < d->max) {
+ d_type = DT_UNKNOWN;
+ bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos);
+ if (bit_pos >= d->max)
+ break;
+
+ de = &d->dentry[bit_pos];
+ if (types && de->file_type < F2FS_FT_MAX)
+ d_type = types[de->file_type];
+
+ over = filldir(dirent, d->filename[bit_pos],
+ le16_to_cpu(de->name_len),
+ (n * d->max) + bit_pos,
+ le32_to_cpu(de->ino), d_type);
+ if (over) {
+ file->f_pos += bit_pos - start_bit_pos;
+ return true;
+ }
+
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ }
+ return false;
+}
+
+static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+ unsigned long pos = file->f_pos;
+ unsigned int bit_pos = 0;
+ struct inode *inode = file_inode(file);
+ unsigned long npages = dir_blocks(inode);
+ struct f2fs_dentry_block *dentry_blk = NULL;
+ struct page *dentry_page = NULL;
+ struct file_ra_state *ra = &file->f_ra;
+ struct f2fs_dentry_ptr d;
+ unsigned int n = 0;
+
+ if (f2fs_has_inline_dentry(inode))
+ return f2fs_read_inline_dir(file, dirent, filldir);
+
+ bit_pos = (pos % NR_DENTRY_IN_BLOCK);
+ n = (pos / NR_DENTRY_IN_BLOCK);
+
+ /* readahead for multi pages of dir */
+ if (npages - n > 1 && !ra_has_index(ra, n))
+ page_cache_sync_readahead(inode->i_mapping, ra, file, n,
+ min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
+
+ for (; n < npages; n++) {
+ dentry_page = get_lock_data_page(inode, n);
+ if (IS_ERR(dentry_page))
+ continue;
+
+ dentry_blk = kmap(dentry_page);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+
+ if (f2fs_fill_dentries(file, dirent, filldir, &d, n, bit_pos))
+ goto stop;
+
+ bit_pos = 0;
+ file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK;
+ kunmap(dentry_page);
+ f2fs_put_page(dentry_page, 1);
+ dentry_page = NULL;
+ }
+stop:
+ if (dentry_page && !IS_ERR(dentry_page)) {
+ kunmap(dentry_page);
+ f2fs_put_page(dentry_page, 1);
+ }
+
+ return 0;
+}
+
+const struct file_operations f2fs_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .readdir = f2fs_readdir,
+ .fsync = f2fs_sync_file,
+ .unlocked_ioctl = f2fs_ioctl,
+};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
new file mode 100644
index 0000000..686aad1
--- /dev/null
+++ b/fs/f2fs/f2fs.h
@@ -0,0 +1,1711 @@
+/*
+ * fs/f2fs/f2fs.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_F2FS_H
+#define _LINUX_F2FS_H
+
+#include <linux/types.h>
+#include <linux/page-flags.h>
+#include <linux/buffer_head.h>
+#include <linux/slab.h>
+#include <linux/crc32.h>
+#include <linux/magic.h>
+#include <linux/kobject.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_F2FS_CHECK_FS
+#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
+#define f2fs_down_write(x, y) down_write(x)
+#else
+#define f2fs_bug_on(sbi, condition) \
+ do { \
+ if (unlikely(condition)) { \
+ WARN_ON(1); \
+ set_sbi_flag(sbi, SBI_NEED_FSCK); \
+ } \
+ } while (0)
+#define f2fs_down_write(x, y) down_write(x)
+#endif
+
+/*
+ * For mount options
+ */
+#define F2FS_SUPER_MAGIC 0xF2F52010 /* F2FS Magic Number */
+#define F2FS_MOUNT_BG_GC 0x00000001
+#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002
+#define F2FS_MOUNT_DISCARD 0x00000004
+#define F2FS_MOUNT_NOHEAP 0x00000008
+#define F2FS_MOUNT_XATTR_USER 0x00000010
+#define F2FS_MOUNT_POSIX_ACL 0x00000020
+#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
+#define F2FS_MOUNT_INLINE_XATTR 0x00000080
+#define F2FS_MOUNT_INLINE_DATA 0x00000100
+#define F2FS_MOUNT_INLINE_DENTRY 0x00000200
+#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
+#define F2FS_MOUNT_NOBARRIER 0x00000800
+#define F2FS_MOUNT_FASTBOOT 0x00001000
+
+#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
+#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
+#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option)
+
+#define ver_after(a, b) (typecheck(unsigned long long, a) && \
+ typecheck(unsigned long long, b) && \
+ ((long long)((a) - (b)) > 0))
+
+typedef u32 block_t; /*
+ * should not change u32, since it is the on-disk block
+ * address format, __le32.
+ */
+typedef u32 nid_t;
+
+struct f2fs_mount_info {
+ unsigned int opt;
+};
+
+#define CRCPOLY_LE 0xedb88320
+
+static inline __u32 f2fs_crc32(void *buf, size_t len)
+{
+ unsigned char *p = (unsigned char *)buf;
+ __u32 crc = F2FS_SUPER_MAGIC;
+ int i;
+
+ while (len--) {
+ crc ^= *p++;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+ }
+ return crc;
+}
+
+static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size)
+{
+ return f2fs_crc32(buf, buf_size) == blk_crc;
+}
+
+/*
+ * For checkpoint manager
+ */
+enum {
+ NAT_BITMAP,
+ SIT_BITMAP
+};
+
+enum {
+ CP_UMOUNT,
+ CP_FASTBOOT,
+ CP_SYNC,
+ CP_DISCARD,
+};
+
+#define DEF_BATCHED_TRIM_SECTIONS 32
+#define BATCHED_TRIM_SEGMENTS(sbi) \
+ (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
+
+struct cp_control {
+ int reason;
+ __u64 trim_start;
+ __u64 trim_end;
+ __u64 trim_minlen;
+ __u64 trimmed;
+};
+
+/*
+ * For CP/NAT/SIT/SSA readahead
+ */
+enum {
+ META_CP,
+ META_NAT,
+ META_SIT,
+ META_SSA,
+ META_POR,
+};
+
+/* for the list of ino */
+enum {
+ ORPHAN_INO, /* for orphan ino list */
+ APPEND_INO, /* for append ino list */
+ UPDATE_INO, /* for update ino list */
+ MAX_INO_ENTRY, /* max. list */
+};
+
+struct ino_entry {
+ struct list_head list; /* list head */
+ nid_t ino; /* inode number */
+};
+
+/*
+ * for the list of directory inodes or gc inodes.
+ * NOTE: there are two slab users for this structure, if we add/modify/delete
+ * fields in structure for one of slab users, it may affect fields or size of
+ * other one, in this condition, it's better to split both of slab and related
+ * data structure.
+ */
+struct inode_entry {
+ struct list_head list; /* list head */
+ struct inode *inode; /* vfs inode pointer */
+};
+
+/* for the list of blockaddresses to be discarded */
+struct discard_entry {
+ struct list_head list; /* list head */
+ block_t blkaddr; /* block address to be discarded */
+ int len; /* # of consecutive blocks of the discard */
+};
+
+/* for the list of fsync inodes, used only during recovery */
+struct fsync_inode_entry {
+ struct list_head list; /* list head */
+ struct inode *inode; /* vfs inode pointer */
+ block_t blkaddr; /* block address locating the last fsync */
+ block_t last_dentry; /* block address locating the last dentry */
+ block_t last_inode; /* block address locating the last inode */
+};
+
+#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats))
+#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits))
+
+#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne)
+#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid)
+#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se)
+#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno)
+
+#define MAX_NAT_JENTRIES(sum) (NAT_JOURNAL_ENTRIES - nats_in_cursum(sum))
+#define MAX_SIT_JENTRIES(sum) (SIT_JOURNAL_ENTRIES - sits_in_cursum(sum))
+
+static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
+{
+ int before = nats_in_cursum(rs);
+ rs->n_nats = cpu_to_le16(before + i);
+ return before;
+}
+
+static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
+{
+ int before = sits_in_cursum(rs);
+ rs->n_sits = cpu_to_le16(before + i);
+ return before;
+}
+
+static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
+ int type)
+{
+ if (type == NAT_JOURNAL)
+ return size <= MAX_NAT_JENTRIES(sum);
+ return size <= MAX_SIT_JENTRIES(sum);
+}
+
+/*
+ * ioctl commands
+ */
+#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
+#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
+#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION
+
+#define F2FS_IOCTL_MAGIC 0xf5
+#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
+#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
+#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
+#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
+#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
+
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#endif
+
+/*
+ * For INODE and NODE manager
+ */
+/* for directory operations */
+struct f2fs_dentry_ptr {
+ const void *bitmap;
+ struct f2fs_dir_entry *dentry;
+ __u8 (*filename)[F2FS_SLOT_LEN];
+ int max;
+};
+
+static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d,
+ void *src, int type)
+{
+ if (type == 1) {
+ struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
+ d->max = NR_DENTRY_IN_BLOCK;
+ d->bitmap = &t->dentry_bitmap;
+ d->dentry = t->dentry;
+ d->filename = t->filename;
+ } else {
+ struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
+ d->max = NR_INLINE_DENTRY;
+ d->bitmap = &t->dentry_bitmap;
+ d->dentry = t->dentry;
+ d->filename = t->filename;
+ }
+}
+
+/*
+ * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1
+ * as its node offset to distinguish from index node blocks.
+ * But some bits are used to mark the node block.
+ */
+#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \
+ >> OFFSET_BIT_SHIFT)
+enum {
+ ALLOC_NODE, /* allocate a new node page if needed */
+ LOOKUP_NODE, /* look up a node without readahead */
+ LOOKUP_NODE_RA, /*
+ * look up a node with readahead called
+ * by get_data_block.
+ */
+};
+
+#define F2FS_LINK_MAX 32000 /* maximum link count per file */
+
+#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
+
+/* for in-memory extent cache entry */
+#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */
+
+struct extent_info {
+ rwlock_t ext_lock; /* rwlock for consistency */
+ unsigned int fofs; /* start offset in a file */
+ u32 blk_addr; /* start block address of the extent */
+ unsigned int len; /* length of the extent */
+};
+
+/*
+ * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
+ */
+#define FADVISE_COLD_BIT 0x01
+#define FADVISE_LOST_PINO_BIT 0x02
+
+#define DEF_DIR_LEVEL 0
+
+struct f2fs_inode_info {
+ struct inode vfs_inode; /* serve a vfs inode */
+ unsigned long i_flags; /* keep an inode flags for ioctl */
+ unsigned char i_advise; /* use to give file attribute hints */
+ unsigned char i_dir_level; /* use for dentry level for large dir */
+ unsigned int i_current_depth; /* use only in directory structure */
+ unsigned int i_pino; /* parent inode number */
+ umode_t i_acl_mode; /* keep file acl mode temporarily */
+
+ /* Use below internally in f2fs*/
+ unsigned long flags; /* use to pass per-file flags */
+ struct rw_semaphore i_sem; /* protect fi info */
+ atomic_t dirty_pages; /* # of dirty pages */
+ f2fs_hash_t chash; /* hash value of given file name */
+ unsigned int clevel; /* maximum level of given file name */
+ nid_t i_xattr_nid; /* node id that contains xattrs */
+ unsigned long long xattr_ver; /* cp version of xattr modification */
+ struct extent_info ext; /* in-memory extent cache entry */
+ struct inode_entry *dirty_dir; /* the pointer of dirty dir */
+
+ struct radix_tree_root inmem_root; /* radix tree for inmem pages */
+ struct list_head inmem_pages; /* inmemory pages managed by f2fs */
+ struct mutex inmem_lock; /* lock for inmemory pages */
+};
+
+static inline void get_extent_info(struct extent_info *ext,
+ struct f2fs_extent i_ext)
+{
+ write_lock(&ext->ext_lock);
+ ext->fofs = le32_to_cpu(i_ext.fofs);
+ ext->blk_addr = le32_to_cpu(i_ext.blk_addr);
+ ext->len = le32_to_cpu(i_ext.len);
+ write_unlock(&ext->ext_lock);
+}
+
+static inline void set_raw_extent(struct extent_info *ext,
+ struct f2fs_extent *i_ext)
+{
+ read_lock(&ext->ext_lock);
+ i_ext->fofs = cpu_to_le32(ext->fofs);
+ i_ext->blk_addr = cpu_to_le32(ext->blk_addr);
+ i_ext->len = cpu_to_le32(ext->len);
+ read_unlock(&ext->ext_lock);
+}
+
+struct f2fs_nm_info {
+ block_t nat_blkaddr; /* base disk address of NAT */
+ nid_t max_nid; /* maximum possible node ids */
+ nid_t available_nids; /* maximum available node ids */
+ nid_t next_scan_nid; /* the next nid to be scanned */
+ unsigned int ram_thresh; /* control the memory footprint */
+
+ /* NAT cache management */
+ struct radix_tree_root nat_root;/* root of the nat entry cache */
+ struct radix_tree_root nat_set_root;/* root of the nat set cache */
+ struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
+ struct list_head nat_entries; /* cached nat entry list (clean) */
+ unsigned int nat_cnt; /* the # of cached nat entries */
+ unsigned int dirty_nat_cnt; /* total num of nat entries in set */
+
+ /* free node ids management */
+ struct radix_tree_root free_nid_root;/* root of the free_nid cache */
+ struct list_head free_nid_list; /* a list for free nids */
+ spinlock_t free_nid_list_lock; /* protect free nid list */
+ unsigned int fcnt; /* the number of free node id */
+ struct mutex build_lock; /* lock for build free nids */
+
+ /* for checkpoint */
+ char *nat_bitmap; /* NAT bitmap pointer */
+ int bitmap_size; /* bitmap size */
+};
+
+/*
+ * this structure is used as one of function parameters.
+ * all the information are dedicated to a given direct node block determined
+ * by the data offset in a file.
+ */
+struct dnode_of_data {
+ struct inode *inode; /* vfs inode pointer */
+ struct page *inode_page; /* its inode page, NULL is possible */
+ struct page *node_page; /* cached direct node page */
+ nid_t nid; /* node id of the direct node block */
+ unsigned int ofs_in_node; /* data offset in the node page */
+ bool inode_page_locked; /* inode page is locked or not */
+ block_t data_blkaddr; /* block address of the node block */
+};
+
+static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
+ struct page *ipage, struct page *npage, nid_t nid)
+{
+ memset(dn, 0, sizeof(*dn));
+ dn->inode = inode;
+ dn->inode_page = ipage;
+ dn->node_page = npage;
+ dn->nid = nid;
+}
+
+/*
+ * For SIT manager
+ *
+ * By default, there are 6 active log areas across the whole main area.
+ * When considering hot and cold data separation to reduce cleaning overhead,
+ * we split 3 for data logs and 3 for node logs as hot, warm, and cold types,
+ * respectively.
+ * In the current design, you should not change the numbers intentionally.
+ * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6
+ * logs individually according to the underlying devices. (default: 6)
+ * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for
+ * data and 8 for node logs.
+ */
+#define NR_CURSEG_DATA_TYPE (3)
+#define NR_CURSEG_NODE_TYPE (3)
+#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+
+enum {
+ CURSEG_HOT_DATA = 0, /* directory entry blocks */
+ CURSEG_WARM_DATA, /* data blocks */
+ CURSEG_COLD_DATA, /* multimedia or GCed data blocks */
+ CURSEG_HOT_NODE, /* direct node blocks of directory files */
+ CURSEG_WARM_NODE, /* direct node blocks of normal files */
+ CURSEG_COLD_NODE, /* indirect node blocks */
+ NO_CHECK_TYPE,
+ CURSEG_DIRECT_IO, /* to use for the direct IO path */
+};
+
+struct flush_cmd {
+ struct completion wait;
+ struct llist_node llnode;
+ int ret;
+};
+
+struct flush_cmd_control {
+ struct task_struct *f2fs_issue_flush; /* flush thread */
+ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
+ struct llist_head issue_list; /* list for command issue */
+ struct llist_node *dispatch_list; /* list for command dispatch */
+};
+
+struct f2fs_sm_info {
+ struct sit_info *sit_info; /* whole segment information */
+ struct free_segmap_info *free_info; /* free segment information */
+ struct dirty_seglist_info *dirty_info; /* dirty segment information */
+ struct curseg_info *curseg_array; /* active segment information */
+
+ block_t seg0_blkaddr; /* block address of 0'th segment */
+ block_t main_blkaddr; /* start block address of main area */
+ block_t ssa_blkaddr; /* start block address of SSA area */
+
+ unsigned int segment_count; /* total # of segments */
+ unsigned int main_segments; /* # of segments in main area */
+ unsigned int reserved_segments; /* # of reserved segments */
+ unsigned int ovp_segments; /* # of overprovision segments */
+
+ /* a threshold to reclaim prefree segments */
+ unsigned int rec_prefree_segments;
+
+ /* for small discard management */
+ struct list_head discard_list; /* 4KB discard list */
+ int nr_discards; /* # of discards in the list */
+ int max_discards; /* max. discards to be issued */
+
+ /* for batched trimming */
+ unsigned int trim_sections; /* # of sections to trim */
+
+ struct list_head sit_entry_set; /* sit entry set list */
+
+ unsigned int ipu_policy; /* in-place-update policy */
+ unsigned int min_ipu_util; /* in-place-update threshold */
+ unsigned int min_fsync_blocks; /* threshold for fsync */
+
+ /* for flush command control */
+ struct flush_cmd_control *cmd_control_info;
+
+};
+
+/*
+ * For superblock
+ */
+/*
+ * COUNT_TYPE for monitoring
+ *
+ * f2fs monitors the number of several block types such as on-writeback,
+ * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
+ */
+enum count_type {
+ F2FS_WRITEBACK,
+ F2FS_DIRTY_DENTS,
+ F2FS_DIRTY_NODES,
+ F2FS_DIRTY_META,
+ F2FS_INMEM_PAGES,
+ NR_COUNT_TYPE,
+};
+
+/*
+ * The below are the page types of bios used in submit_bio().
+ * The available types are:
+ * DATA User data pages. It operates as async mode.
+ * NODE Node pages. It operates as async mode.
+ * META FS metadata pages such as SIT, NAT, CP.
+ * NR_PAGE_TYPE The number of page types.
+ * META_FLUSH Make sure the previous pages are written
+ * with waiting the bio's completion
+ * ... Only can be used with META.
+ */
+#define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type))
+enum page_type {
+ DATA,
+ NODE,
+ META,
+ NR_PAGE_TYPE,
+ META_FLUSH,
+};
+
+struct f2fs_io_info {
+ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
+ int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
+ block_t blk_addr; /* block address to be written */
+};
+
+#define is_read_io(rw) (((rw) & 1) == READ)
+struct f2fs_bio_info {
+ struct f2fs_sb_info *sbi; /* f2fs superblock */
+ struct bio *bio; /* bios to merge */
+ sector_t last_block_in_bio; /* last block number */
+ struct f2fs_io_info fio; /* store buffered io info. */
+ struct rw_semaphore io_rwsem; /* blocking op for bio */
+};
+
+/* for inner inode cache management */
+struct inode_management {
+ struct radix_tree_root ino_root; /* ino entry array */
+ spinlock_t ino_lock; /* for ino entry lock */
+ struct list_head ino_list; /* inode list head */
+ unsigned long ino_num; /* number of entries */
+};
+
+/* For s_flag in struct f2fs_sb_info */
+enum {
+ SBI_IS_DIRTY, /* dirty flag for checkpoint */
+ SBI_IS_CLOSE, /* specify unmounting */
+ SBI_NEED_FSCK, /* need fsck.f2fs to fix */
+ SBI_POR_DOING, /* recovery is doing or not */
+};
+
+struct f2fs_sb_info {
+ struct super_block *sb; /* pointer to VFS super block */
+ struct proc_dir_entry *s_proc; /* proc entry */
+ struct buffer_head *raw_super_buf; /* buffer head of raw sb */
+ struct f2fs_super_block *raw_super; /* raw super block pointer */
+ int s_flag; /* flags for sbi */
+
+ /* for node-related operations */
+ struct f2fs_nm_info *nm_info; /* node manager */
+ struct inode *node_inode; /* cache node blocks */
+
+ /* for segment-related operations */
+ struct f2fs_sm_info *sm_info; /* segment manager */
+
+ /* for bio operations */
+ struct f2fs_bio_info read_io; /* for read bios */
+ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
+
+ /* for checkpoint */
+ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
+ struct inode *meta_inode; /* cache meta blocks */
+ struct mutex cp_mutex; /* checkpoint procedure lock */
+ struct rw_semaphore cp_rwsem; /* blocking FS operations */
+ struct rw_semaphore node_write; /* locking node writes */
+ struct mutex writepages; /* mutex for writepages() */
+ wait_queue_head_t cp_wait;
+
+ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
+
+ /* for orphan inode, use 0'th array */
+ unsigned int max_orphans; /* max orphan inodes */
+
+ /* for directory inode management */
+ struct list_head dir_inode_list; /* dir inode list */
+ spinlock_t dir_inode_lock; /* for dir inode list lock */
+
+ /* basic filesystem units */
+ unsigned int log_sectors_per_block; /* log2 sectors per block */
+ unsigned int log_blocksize; /* log2 block size */
+ unsigned int blocksize; /* block size */
+ unsigned int root_ino_num; /* root inode number*/
+ unsigned int node_ino_num; /* node inode number*/
+ unsigned int meta_ino_num; /* meta inode number*/
+ unsigned int log_blocks_per_seg; /* log2 blocks per segment */
+ unsigned int blocks_per_seg; /* blocks per segment */
+ unsigned int segs_per_sec; /* segments per section */
+ unsigned int secs_per_zone; /* sections per zone */
+ unsigned int total_sections; /* total section count */
+ unsigned int total_node_count; /* total node block count */
+ unsigned int total_valid_node_count; /* valid node block count */
+ unsigned int total_valid_inode_count; /* valid inode count */
+ int active_logs; /* # of active logs */
+ int dir_level; /* directory level */
+
+ block_t user_block_count; /* # of user blocks */
+ block_t total_valid_block_count; /* # of valid blocks */
+ block_t alloc_valid_block_count; /* # of allocated blocks */
+ block_t last_valid_block_count; /* for recovery */
+ u32 s_next_generation; /* for NFS support */
+ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */
+
+ struct f2fs_mount_info mount_opt; /* mount options */
+
+ /* for cleaning operations */
+ struct mutex gc_mutex; /* mutex for GC */
+ struct f2fs_gc_kthread *gc_thread; /* GC thread */
+ unsigned int cur_victim_sec; /* current victim section num */
+
+ /* maximum # of trials to find a victim segment for SSR and GC */
+ unsigned int max_victim_search;
+
+ /*
+ * for stat information.
+ * one is for the LFS mode, and the other is for the SSR mode.
+ */
+#ifdef CONFIG_F2FS_STAT_FS
+ struct f2fs_stat_info *stat_info; /* FS status information */
+ unsigned int segment_count[2]; /* # of allocated segments */
+ unsigned int block_count[2]; /* # of allocated blocks */
+ atomic_t inplace_count; /* # of inplace update */
+ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
+ atomic_t inline_inode; /* # of inline_data inodes */
+ atomic_t inline_dir; /* # of inline_dentry inodes */
+ int bg_gc; /* background gc calls */
+ unsigned int n_dirty_dirs; /* # of dir inodes */
+#endif
+ unsigned int last_victim[2]; /* last victim segment # */
+ spinlock_t stat_lock; /* lock for stat operations */
+
+ /* For sysfs suppport */
+ struct kobject s_kobj;
+ struct completion s_kobj_unregister;
+};
+
+/*
+ * Inline functions
+ */
+static inline struct f2fs_inode_info *F2FS_I(struct inode *inode)
+{
+ return container_of(inode, struct f2fs_inode_info, vfs_inode);
+}
+
+static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+static inline struct f2fs_sb_info *F2FS_I_SB(struct inode *inode)
+{
+ return F2FS_SB(inode->i_sb);
+}
+
+static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping)
+{
+ return F2FS_I_SB(mapping->host);
+}
+
+static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page)
+{
+ return F2FS_M_SB(page->mapping);
+}
+
+static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
+{
+ return (struct f2fs_super_block *)(sbi->raw_super);
+}
+
+static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi)
+{
+ return (struct f2fs_checkpoint *)(sbi->ckpt);
+}
+
+static inline struct f2fs_node *F2FS_NODE(struct page *page)
+{
+ return (struct f2fs_node *)page_address(page);
+}
+
+static inline struct f2fs_inode *F2FS_INODE(struct page *page)
+{
+ return &((struct f2fs_node *)page_address(page))->i;
+}
+
+static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
+{
+ return (struct f2fs_nm_info *)(sbi->nm_info);
+}
+
+static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi)
+{
+ return (struct f2fs_sm_info *)(sbi->sm_info);
+}
+
+static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
+{
+ return (struct sit_info *)(SM_I(sbi)->sit_info);
+}
+
+static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
+{
+ return (struct free_segmap_info *)(SM_I(sbi)->free_info);
+}
+
+static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi)
+{
+ return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info);
+}
+
+static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi)
+{
+ return sbi->meta_inode->i_mapping;
+}
+
+static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi)
+{
+ return sbi->node_inode->i_mapping;
+}
+
+static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type)
+{
+ return sbi->s_flag & (0x01 << type);
+}
+
+static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
+{
+ sbi->s_flag |= (0x01 << type);
+}
+
+static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
+{
+ sbi->s_flag &= ~(0x01 << type);
+}
+
+static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
+{
+ return le64_to_cpu(cp->checkpoint_ver);
+}
+
+static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+ unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+ return ckpt_flags & f;
+}
+
+static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+ unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+ ckpt_flags |= f;
+ cp->ckpt_flags = cpu_to_le32(ckpt_flags);
+}
+
+static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+ unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+ ckpt_flags &= (~f);
+ cp->ckpt_flags = cpu_to_le32(ckpt_flags);
+}
+
+static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
+{
+ down_read(&sbi->cp_rwsem);
+}
+
+static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
+{
+ up_read(&sbi->cp_rwsem);
+}
+
+static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
+{
+ f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
+}
+
+static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
+{
+ up_write(&sbi->cp_rwsem);
+}
+
+static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
+{
+ int reason = CP_SYNC;
+
+ if (test_opt(sbi, FASTBOOT))
+ reason = CP_FASTBOOT;
+ if (is_sbi_flag_set(sbi, SBI_IS_CLOSE))
+ reason = CP_UMOUNT;
+ return reason;
+}
+
+static inline bool __remain_node_summaries(int reason)
+{
+ return (reason == CP_UMOUNT || reason == CP_FASTBOOT);
+}
+
+static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi)
+{
+ return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) ||
+ is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG));
+}
+
+/*
+ * Check whether the given nid is within node id range.
+ */
+static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ if (unlikely(nid < F2FS_ROOT_INO(sbi)))
+ return -EINVAL;
+ if (unlikely(nid >= NM_I(sbi)->max_nid))
+ return -EINVAL;
+ return 0;
+}
+
+#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1
+
+/*
+ * Check whether the inode has blocks or not
+ */
+static inline int F2FS_HAS_BLOCKS(struct inode *inode)
+{
+ if (F2FS_I(inode)->i_xattr_nid)
+ return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1;
+ else
+ return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
+}
+
+static inline bool f2fs_has_xattr_block(unsigned int ofs)
+{
+ return ofs == XATTR_NODE_OFFSET;
+}
+
+static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
+ struct inode *inode, blkcnt_t count)
+{
+ block_t valid_block_count;
+
+ spin_lock(&sbi->stat_lock);
+ valid_block_count =
+ sbi->total_valid_block_count + (block_t)count;
+ if (unlikely(valid_block_count > sbi->user_block_count)) {
+ spin_unlock(&sbi->stat_lock);
+ return false;
+ }
+ inode->i_blocks += count;
+ sbi->total_valid_block_count = valid_block_count;
+ sbi->alloc_valid_block_count += (block_t)count;
+ spin_unlock(&sbi->stat_lock);
+ return true;
+}
+
+static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
+ struct inode *inode,
+ blkcnt_t count)
+{
+ spin_lock(&sbi->stat_lock);
+ f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
+ f2fs_bug_on(sbi, inode->i_blocks < count);
+ inode->i_blocks -= count;
+ sbi->total_valid_block_count -= (block_t)count;
+ spin_unlock(&sbi->stat_lock);
+}
+
+static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
+{
+ atomic_inc(&sbi->nr_pages[count_type]);
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
+}
+
+static inline void inode_inc_dirty_pages(struct inode *inode)
+{
+ atomic_inc(&F2FS_I(inode)->dirty_pages);
+ if (S_ISDIR(inode->i_mode))
+ inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
+}
+
+static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
+{
+ atomic_dec(&sbi->nr_pages[count_type]);
+}
+
+static inline void inode_dec_dirty_pages(struct inode *inode)
+{
+ if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
+ return;
+
+ atomic_dec(&F2FS_I(inode)->dirty_pages);
+
+ if (S_ISDIR(inode->i_mode))
+ dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
+}
+
+static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
+{
+ return atomic_read(&sbi->nr_pages[count_type]);
+}
+
+static inline int get_dirty_pages(struct inode *inode)
+{
+ return atomic_read(&F2FS_I(inode)->dirty_pages);
+}
+
+static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
+{
+ unsigned int pages_per_sec = sbi->segs_per_sec *
+ (1 << sbi->log_blocks_per_seg);
+ return ((get_pages(sbi, block_type) + pages_per_sec - 1)
+ >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
+}
+
+static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
+{
+ return sbi->total_valid_block_count;
+}
+
+static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+
+ /* return NAT or SIT bitmap */
+ if (flag == NAT_BITMAP)
+ return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
+ else if (flag == SIT_BITMAP)
+ return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
+
+ return 0;
+}
+
+static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ int offset;
+
+ if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
+ if (flag == NAT_BITMAP)
+ return &ckpt->sit_nat_version_bitmap;
+ else
+ return (unsigned char *)ckpt + F2FS_BLKSIZE;
+ } else {
+ offset = (flag == NAT_BITMAP) ?
+ le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
+ return &ckpt->sit_nat_version_bitmap + offset;
+ }
+}
+
+static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
+{
+ block_t start_addr;
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ unsigned long long ckpt_version = cur_cp_version(ckpt);
+
+ start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+
+ /*
+ * odd numbered checkpoint should at cp segment 0
+ * and even segment must be at cp segment 1
+ */
+ if (!(ckpt_version & 1))
+ start_addr += sbi->blocks_per_seg;
+
+ return start_addr;
+}
+
+static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
+{
+ return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
+}
+
+static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
+ struct inode *inode)
+{
+ block_t valid_block_count;
+ unsigned int valid_node_count;
+
+ spin_lock(&sbi->stat_lock);
+
+ valid_block_count = sbi->total_valid_block_count + 1;
+ if (unlikely(valid_block_count > sbi->user_block_count)) {
+ spin_unlock(&sbi->stat_lock);
+ return false;
+ }
+
+ valid_node_count = sbi->total_valid_node_count + 1;
+ if (unlikely(valid_node_count > sbi->total_node_count)) {
+ spin_unlock(&sbi->stat_lock);
+ return false;
+ }
+
+ if (inode)
+ inode->i_blocks++;
+
+ sbi->alloc_valid_block_count++;
+ sbi->total_valid_node_count++;
+ sbi->total_valid_block_count++;
+ spin_unlock(&sbi->stat_lock);
+
+ return true;
+}
+
+static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
+ struct inode *inode)
+{
+ spin_lock(&sbi->stat_lock);
+
+ f2fs_bug_on(sbi, !sbi->total_valid_block_count);
+ f2fs_bug_on(sbi, !sbi->total_valid_node_count);
+ f2fs_bug_on(sbi, !inode->i_blocks);
+
+ inode->i_blocks--;
+ sbi->total_valid_node_count--;
+ sbi->total_valid_block_count--;
+
+ spin_unlock(&sbi->stat_lock);
+}
+
+static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
+{
+ return sbi->total_valid_node_count;
+}
+
+static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
+{
+ spin_lock(&sbi->stat_lock);
+ f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count);
+ sbi->total_valid_inode_count++;
+ spin_unlock(&sbi->stat_lock);
+}
+
+static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
+{
+ spin_lock(&sbi->stat_lock);
+ f2fs_bug_on(sbi, !sbi->total_valid_inode_count);
+ sbi->total_valid_inode_count--;
+ spin_unlock(&sbi->stat_lock);
+}
+
+static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
+{
+ return sbi->total_valid_inode_count;
+}
+
+static inline void f2fs_put_page(struct page *page, int unlock)
+{
+ if (!page)
+ return;
+
+ if (unlock) {
+ f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page));
+ unlock_page(page);
+ }
+ page_cache_release(page);
+}
+
+static inline void f2fs_put_dnode(struct dnode_of_data *dn)
+{
+ if (dn->node_page)
+ f2fs_put_page(dn->node_page, 1);
+ if (dn->inode_page && dn->node_page != dn->inode_page)
+ f2fs_put_page(dn->inode_page, 0);
+ dn->node_page = NULL;
+ dn->inode_page = NULL;
+}
+
+static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
+ size_t size)
+{
+ return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
+}
+
+static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags)
+{
+ void *entry;
+retry:
+ entry = kmem_cache_alloc(cachep, flags);
+ if (!entry) {
+ cond_resched();
+ goto retry;
+ }
+
+ return entry;
+}
+
+static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
+ unsigned long index, void *item)
+{
+ while (radix_tree_insert(root, index, item))
+ cond_resched();
+}
+
+#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
+
+static inline bool IS_INODE(struct page *page)
+{
+ struct f2fs_node *p = F2FS_NODE(page);
+ return RAW_IS_INODE(p);
+}
+
+static inline __le32 *blkaddr_in_node(struct f2fs_node *node)
+{
+ return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr;
+}
+
+static inline block_t datablock_addr(struct page *node_page,
+ unsigned int offset)
+{
+ struct f2fs_node *raw_node;
+ __le32 *addr_array;
+ raw_node = F2FS_NODE(node_page);
+ addr_array = blkaddr_in_node(raw_node);
+ return le32_to_cpu(addr_array[offset]);
+}
+
+static inline int f2fs_test_bit(unsigned int nr, char *addr)
+{
+ int mask;
+
+ addr += (nr >> 3);
+ mask = 1 << (7 - (nr & 0x07));
+ return mask & *addr;
+}
+
+static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr)
+{
+ int mask;
+ int ret;
+
+ addr += (nr >> 3);
+ mask = 1 << (7 - (nr & 0x07));
+ ret = mask & *addr;
+ *addr |= mask;
+ return ret;
+}
+
+static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr)
+{
+ int mask;
+ int ret;
+
+ addr += (nr >> 3);
+ mask = 1 << (7 - (nr & 0x07));
+ ret = mask & *addr;
+ *addr &= ~mask;
+ return ret;
+}
+
+static inline void f2fs_change_bit(unsigned int nr, char *addr)
+{
+ int mask;
+
+ addr += (nr >> 3);
+ mask = 1 << (7 - (nr & 0x07));
+ *addr ^= mask;
+}
+
+/* used for f2fs_inode_info->flags */
+enum {
+ FI_NEW_INODE, /* indicate newly allocated inode */
+ FI_DIRTY_INODE, /* indicate inode is dirty or not */
+ FI_DIRTY_DIR, /* indicate directory has dirty pages */
+ FI_INC_LINK, /* need to increment i_nlink */
+ FI_ACL_MODE, /* indicate acl mode */
+ FI_NO_ALLOC, /* should not allocate any blocks */
+ FI_UPDATE_DIR, /* should update inode block for consistency */
+ FI_DELAY_IPUT, /* used for the recovery */
+ FI_NO_EXTENT, /* not to use the extent cache */
+ FI_INLINE_XATTR, /* used for inline xattr */
+ FI_INLINE_DATA, /* used for inline data*/
+ FI_INLINE_DENTRY, /* used for inline dentry */
+ FI_APPEND_WRITE, /* inode has appended data */
+ FI_UPDATE_WRITE, /* inode has in-place-update data */
+ FI_NEED_IPU, /* used for ipu per file */
+ FI_ATOMIC_FILE, /* indicate atomic file */
+ FI_VOLATILE_FILE, /* indicate volatile file */
+ FI_DROP_CACHE, /* drop dirty page cache */
+ FI_DATA_EXIST, /* indicate data exists */
+};
+
+static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
+{
+ if (!test_bit(flag, &fi->flags))
+ set_bit(flag, &fi->flags);
+}
+
+static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
+{
+ return test_bit(flag, &fi->flags);
+}
+
+static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
+{
+ if (test_bit(flag, &fi->flags))
+ clear_bit(flag, &fi->flags);
+}
+
+static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
+{
+ fi->i_acl_mode = mode;
+ set_inode_flag(fi, FI_ACL_MODE);
+}
+
+static inline void get_inline_info(struct f2fs_inode_info *fi,
+ struct f2fs_inode *ri)
+{
+ if (ri->i_inline & F2FS_INLINE_XATTR)
+ set_inode_flag(fi, FI_INLINE_XATTR);
+ if (ri->i_inline & F2FS_INLINE_DATA)
+ set_inode_flag(fi, FI_INLINE_DATA);
+ if (ri->i_inline & F2FS_INLINE_DENTRY)
+ set_inode_flag(fi, FI_INLINE_DENTRY);
+ if (ri->i_inline & F2FS_DATA_EXIST)
+ set_inode_flag(fi, FI_DATA_EXIST);
+}
+
+static inline void set_raw_inline(struct f2fs_inode_info *fi,
+ struct f2fs_inode *ri)
+{
+ ri->i_inline = 0;
+
+ if (is_inode_flag_set(fi, FI_INLINE_XATTR))
+ ri->i_inline |= F2FS_INLINE_XATTR;
+ if (is_inode_flag_set(fi, FI_INLINE_DATA))
+ ri->i_inline |= F2FS_INLINE_DATA;
+ if (is_inode_flag_set(fi, FI_INLINE_DENTRY))
+ ri->i_inline |= F2FS_INLINE_DENTRY;
+ if (is_inode_flag_set(fi, FI_DATA_EXIST))
+ ri->i_inline |= F2FS_DATA_EXIST;
+}
+
+static inline int f2fs_has_inline_xattr(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
+}
+
+static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
+{
+ if (f2fs_has_inline_xattr(&fi->vfs_inode))
+ return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
+ return DEF_ADDRS_PER_INODE;
+}
+
+static inline void *inline_xattr_addr(struct page *page)
+{
+ struct f2fs_inode *ri = F2FS_INODE(page);
+ return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
+ F2FS_INLINE_XATTR_ADDRS]);
+}
+
+static inline int inline_xattr_size(struct inode *inode)
+{
+ if (f2fs_has_inline_xattr(inode))
+ return F2FS_INLINE_XATTR_ADDRS << 2;
+ else
+ return 0;
+}
+
+static inline int f2fs_has_inline_data(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
+}
+
+static inline void f2fs_clear_inline_inode(struct inode *inode)
+{
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+}
+
+static inline int f2fs_exist_data(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
+}
+
+static inline bool f2fs_is_atomic_file(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
+}
+
+static inline bool f2fs_is_volatile_file(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
+}
+
+static inline bool f2fs_is_drop_cache(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE);
+}
+
+static inline void *inline_data_addr(struct page *page)
+{
+ struct f2fs_inode *ri = F2FS_INODE(page);
+ return (void *)&(ri->i_addr[1]);
+}
+
+static inline int f2fs_has_inline_dentry(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
+}
+
+static inline void *inline_dentry_addr(struct page *page)
+{
+ struct f2fs_inode *ri = F2FS_INODE(page);
+ return (void *)&(ri->i_addr[1]);
+}
+
+static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
+{
+ if (!f2fs_has_inline_dentry(dir))
+ kunmap(page);
+}
+
+static inline int f2fs_readonly(struct super_block *sb)
+{
+ return sb->s_flags & MS_RDONLY;
+}
+
+static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
+{
+ return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+}
+
+static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
+{
+ set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ sbi->sb->s_flags |= MS_RDONLY;
+}
+
+static inline struct inode *file_inode(struct file *f)
+{
+ return f->f_path.dentry->d_inode;
+}
+
+#define get_inode_mode(i) \
+ ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
+ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
+
+/* get offset of first page in next direct node */
+#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \
+ ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \
+ (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \
+ ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+
+/*
+ * file.c
+ */
+int f2fs_sync_file(struct file *, loff_t, loff_t, int);
+void truncate_data_blocks(struct dnode_of_data *);
+int truncate_blocks(struct inode *, u64, bool);
+void f2fs_truncate(struct inode *);
+int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+int f2fs_setattr(struct dentry *, struct iattr *);
+int truncate_hole(struct inode *, pgoff_t, pgoff_t);
+int truncate_data_blocks_range(struct dnode_of_data *, int);
+long f2fs_ioctl(struct file *, unsigned int, unsigned long);
+long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
+
+/*
+ * inode.c
+ */
+void f2fs_set_inode_flags(struct inode *);
+struct inode *f2fs_iget(struct super_block *, unsigned long);
+int try_to_free_nats(struct f2fs_sb_info *, int);
+void update_inode(struct inode *, struct page *);
+void update_inode_page(struct inode *);
+int f2fs_write_inode(struct inode *, struct writeback_control *);
+void f2fs_evict_inode(struct inode *);
+void handle_failed_inode(struct inode *);
+
+/*
+ * namei.c
+ */
+struct dentry *f2fs_get_parent(struct dentry *child);
+
+/*
+ * dir.c
+ */
+extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
+void set_de_type(struct f2fs_dir_entry *, struct inode *);
+struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *,
+ struct f2fs_dentry_ptr *);
+bool f2fs_fill_dentries(struct file *, void *, filldir_t,
+ struct f2fs_dentry_ptr *, unsigned int, unsigned int);
+void do_make_empty_dir(struct inode *, struct inode *,
+ struct f2fs_dentry_ptr *);
+struct page *init_inode_metadata(struct inode *, struct inode *,
+ const struct qstr *, struct page *);
+void update_parent_metadata(struct inode *, struct inode *, unsigned int);
+int room_for_filename(const void *, int, int);
+void f2fs_drop_nlink(struct inode *, struct inode *, struct page *);
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *,
+ struct page **);
+struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
+ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
+void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
+ struct page *, struct inode *);
+int update_dent_inode(struct inode *, const struct qstr *);
+int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
+void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
+ struct inode *);
+int f2fs_do_tmpfile(struct inode *, struct inode *);
+int f2fs_make_empty(struct inode *, struct inode *);
+bool f2fs_empty_dir(struct inode *);
+
+static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
+{
+ return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
+ inode);
+}
+
+/*
+ * super.c
+ */
+int f2fs_sync_fs(struct super_block *, int);
+extern __printf(3, 4)
+void f2fs_msg(struct super_block *, const char *, const char *, ...);
+
+/*
+ * hash.c
+ */
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
+
+/*
+ * node.c
+ */
+struct dnode_of_data;
+struct node_info;
+
+bool available_free_memory(struct f2fs_sb_info *, int);
+bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
+bool has_fsynced_inode(struct f2fs_sb_info *, nid_t);
+bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
+void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
+int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
+int truncate_inode_blocks(struct inode *, pgoff_t);
+int truncate_xattr_node(struct inode *, struct page *);
+int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
+void remove_inode_page(struct inode *);
+struct page *new_inode_page(struct inode *);
+struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
+void ra_node_page(struct f2fs_sb_info *, nid_t);
+struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
+struct page *get_node_page_ra(struct page *, int);
+void sync_inode_page(struct dnode_of_data *);
+int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
+bool alloc_nid(struct f2fs_sb_info *, nid_t *);
+void alloc_nid_done(struct f2fs_sb_info *, nid_t);
+void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
+void recover_inline_xattr(struct inode *, struct page *);
+void recover_xattr_data(struct inode *, struct page *, block_t);
+int recover_inode_page(struct f2fs_sb_info *, struct page *);
+int restore_node_summary(struct f2fs_sb_info *, unsigned int,
+ struct f2fs_summary_block *);
+void flush_nat_entries(struct f2fs_sb_info *);
+int build_node_manager(struct f2fs_sb_info *);
+void destroy_node_manager(struct f2fs_sb_info *);
+int __init create_node_manager_caches(void);
+void destroy_node_manager_caches(void);
+
+/*
+ * segment.c
+ */
+void register_inmem_page(struct inode *, struct page *);
+void commit_inmem_pages(struct inode *, bool);
+void f2fs_balance_fs(struct f2fs_sb_info *);
+void f2fs_balance_fs_bg(struct f2fs_sb_info *);
+int f2fs_issue_flush(struct f2fs_sb_info *);
+int create_flush_cmd_control(struct f2fs_sb_info *);
+void destroy_flush_cmd_control(struct f2fs_sb_info *);
+void invalidate_blocks(struct f2fs_sb_info *, block_t);
+void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
+void clear_prefree_segments(struct f2fs_sb_info *);
+void release_discard_addrs(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *, block_t);
+int npages_for_summary_flush(struct f2fs_sb_info *, bool);
+void allocate_new_segments(struct f2fs_sb_info *);
+int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
+struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
+void write_meta_page(struct f2fs_sb_info *, struct page *);
+void write_node_page(struct f2fs_sb_info *, struct page *,
+ unsigned int, struct f2fs_io_info *);
+void write_data_page(struct page *, struct dnode_of_data *,
+ struct f2fs_io_info *);
+void rewrite_data_page(struct page *, struct f2fs_io_info *);
+void recover_data_page(struct f2fs_sb_info *, struct page *,
+ struct f2fs_summary *, block_t, block_t);
+void allocate_data_block(struct f2fs_sb_info *, struct page *,
+ block_t, block_t *, struct f2fs_summary *, int);
+void f2fs_wait_on_page_writeback(struct page *, enum page_type);
+void write_data_summaries(struct f2fs_sb_info *, block_t);
+void write_node_summaries(struct f2fs_sb_info *, block_t);
+int lookup_journal_in_cursum(struct f2fs_summary_block *,
+ int, unsigned int, int);
+void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *);
+int build_segment_manager(struct f2fs_sb_info *);
+void destroy_segment_manager(struct f2fs_sb_info *);
+int __init create_segment_manager_caches(void);
+void destroy_segment_manager_caches(void);
+
+/*
+ * checkpoint.c
+ */
+struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
+struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int);
+void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
+long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
+void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+void release_dirty_inode(struct f2fs_sb_info *);
+bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
+int acquire_orphan_inode(struct f2fs_sb_info *);
+void release_orphan_inode(struct f2fs_sb_info *);
+void add_orphan_inode(struct f2fs_sb_info *, nid_t);
+void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
+void recover_orphan_inodes(struct f2fs_sb_info *);
+int get_valid_checkpoint(struct f2fs_sb_info *);
+void update_dirty_page(struct inode *, struct page *);
+void add_dirty_dir_inode(struct inode *);
+void remove_dirty_dir_inode(struct inode *);
+void sync_dirty_dir_inodes(struct f2fs_sb_info *);
+void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
+void init_ino_entry_info(struct f2fs_sb_info *);
+int __init create_checkpoint_caches(void);
+void destroy_checkpoint_caches(void);
+
+/*
+ * data.c
+ */
+void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
+int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *,
+ struct f2fs_io_info *);
+void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *,
+ struct f2fs_io_info *);
+int reserve_new_block(struct dnode_of_data *);
+int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
+void update_extent_cache(struct dnode_of_data *);
+struct page *find_data_page(struct inode *, pgoff_t, bool);
+struct page *get_lock_data_page(struct inode *, pgoff_t);
+struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
+int do_write_data_page(struct page *, struct f2fs_io_info *);
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
+void f2fs_invalidate_page(struct page *, unsigned long);
+int f2fs_release_page(struct page *, gfp_t);
+
+/*
+ * gc.c
+ */
+int start_gc_thread(struct f2fs_sb_info *);
+void stop_gc_thread(struct f2fs_sb_info *);
+block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
+int f2fs_gc(struct f2fs_sb_info *);
+void build_gc_manager(struct f2fs_sb_info *);
+
+/*
+ * recovery.c
+ */
+int recover_fsync_data(struct f2fs_sb_info *);
+bool space_for_roll_forward(struct f2fs_sb_info *);
+
+/*
+ * debug.c
+ */
+#ifdef CONFIG_F2FS_STAT_FS
+struct f2fs_stat_info {
+ struct list_head stat_list;
+ struct f2fs_sb_info *sbi;
+ int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
+ int main_area_segs, main_area_sections, main_area_zones;
+ int hit_ext, total_ext;
+ int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
+ int nats, dirty_nats, sits, dirty_sits, fnids;
+ int total_count, utilization;
+ int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages;
+ unsigned int valid_count, valid_node_count, valid_inode_count;
+ unsigned int bimodal, avg_vblocks;
+ int util_free, util_valid, util_invalid;
+ int rsvd_segs, overp_segs;
+ int dirty_count, node_pages, meta_pages;
+ int prefree_count, call_count, cp_count;
+ int tot_segs, node_segs, data_segs, free_segs, free_secs;
+ int tot_blks, data_blks, node_blks;
+ int curseg[NR_CURSEG_TYPE];
+ int cursec[NR_CURSEG_TYPE];
+ int curzone[NR_CURSEG_TYPE];
+
+ unsigned int segment_count[2];
+ unsigned int block_count[2];
+ unsigned int inplace_count;
+ unsigned base_mem, cache_mem, page_mem;
+};
+
+static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
+{
+ return (struct f2fs_stat_info *)sbi->stat_info;
+}
+
+#define stat_inc_cp_count(si) ((si)->cp_count++)
+#define stat_inc_call_count(si) ((si)->call_count++)
+#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
+#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
+#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
+#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++)
+#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++)
+#define stat_inc_inline_inode(inode) \
+ do { \
+ if (f2fs_has_inline_data(inode)) \
+ (atomic_inc(&F2FS_I_SB(inode)->inline_inode)); \
+ } while (0)
+#define stat_dec_inline_inode(inode) \
+ do { \
+ if (f2fs_has_inline_data(inode)) \
+ (atomic_dec(&F2FS_I_SB(inode)->inline_inode)); \
+ } while (0)
+#define stat_inc_inline_dir(inode) \
+ do { \
+ if (f2fs_has_inline_dentry(inode)) \
+ (atomic_inc(&F2FS_I_SB(inode)->inline_dir)); \
+ } while (0)
+#define stat_dec_inline_dir(inode) \
+ do { \
+ if (f2fs_has_inline_dentry(inode)) \
+ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \
+ } while (0)
+#define stat_inc_seg_type(sbi, curseg) \
+ ((sbi)->segment_count[(curseg)->alloc_type]++)
+#define stat_inc_block_count(sbi, curseg) \
+ ((sbi)->block_count[(curseg)->alloc_type]++)
+#define stat_inc_inplace_blocks(sbi) \
+ (atomic_inc(&(sbi)->inplace_count))
+#define stat_inc_seg_count(sbi, type) \
+ do { \
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \
+ (si)->tot_segs++; \
+ if (type == SUM_TYPE_DATA) \
+ si->data_segs++; \
+ else \
+ si->node_segs++; \
+ } while (0)
+
+#define stat_inc_tot_blk_count(si, blks) \
+ (si->tot_blks += (blks))
+
+#define stat_inc_data_blk_count(sbi, blks) \
+ do { \
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \
+ stat_inc_tot_blk_count(si, blks); \
+ si->data_blks += (blks); \
+ } while (0)
+
+#define stat_inc_node_blk_count(sbi, blks) \
+ do { \
+ struct f2fs_stat_info *si = F2FS_STAT(sbi); \
+ stat_inc_tot_blk_count(si, blks); \
+ si->node_blks += (blks); \
+ } while (0)
+
+int f2fs_build_stats(struct f2fs_sb_info *);
+void f2fs_destroy_stats(struct f2fs_sb_info *);
+void __init f2fs_create_root_stats(void);
+void f2fs_destroy_root_stats(void);
+#else
+#define stat_inc_cp_count(si)
+#define stat_inc_call_count(si)
+#define stat_inc_bggc_count(si)
+#define stat_inc_dirty_dir(sbi)
+#define stat_dec_dirty_dir(sbi)
+#define stat_inc_total_hit(sb)
+#define stat_inc_read_hit(sb)
+#define stat_inc_inline_inode(inode)
+#define stat_dec_inline_inode(inode)
+#define stat_inc_inline_dir(inode)
+#define stat_dec_inline_dir(inode)
+#define stat_inc_seg_type(sbi, curseg)
+#define stat_inc_block_count(sbi, curseg)
+#define stat_inc_inplace_blocks(sbi)
+#define stat_inc_seg_count(si, type)
+#define stat_inc_tot_blk_count(si, blks)
+#define stat_inc_data_blk_count(si, blks)
+#define stat_inc_node_blk_count(sbi, blks)
+
+static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
+static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
+static inline void __init f2fs_create_root_stats(void) { }
+static inline void f2fs_destroy_root_stats(void) { }
+#endif
+
+extern const struct file_operations f2fs_dir_operations;
+extern const struct file_operations f2fs_file_operations;
+extern const struct inode_operations f2fs_file_inode_operations;
+extern const struct address_space_operations f2fs_dblock_aops;
+extern const struct address_space_operations f2fs_node_aops;
+extern const struct address_space_operations f2fs_meta_aops;
+extern const struct inode_operations f2fs_dir_inode_operations;
+extern const struct inode_operations f2fs_symlink_inode_operations;
+extern const struct inode_operations f2fs_special_inode_operations;
+extern struct kmem_cache *inode_entry_slab;
+
+/*
+ * inline.c
+ */
+bool f2fs_may_inline(struct inode *);
+void read_inline_data(struct page *, struct page *);
+int f2fs_read_inline_data(struct inode *, struct page *);
+int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
+int f2fs_convert_inline_inode(struct inode *);
+int f2fs_write_inline_data(struct inode *, struct page *);
+bool recover_inline_data(struct inode *, struct page *);
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
+ struct page **);
+struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
+int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
+int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *);
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
+ struct inode *, struct inode *);
+bool f2fs_empty_inline_dir(struct inode *);
+int f2fs_read_inline_dir(struct file *, void *, filldir_t);
+#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
new file mode 100644
index 0000000..590651c
--- /dev/null
+++ b/fs/f2fs/file.c
@@ -0,0 +1,1175 @@
+/*
+ * fs/f2fs/file.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/stat.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+#include <linux/falloc.h>
+#include <linux/types.h>
+#include <linux/compat.h>
+#include <linux/uaccess.h>
+#include <linux/mount.h>
+#include <linux/pagevec.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "xattr.h"
+#include "acl.h"
+#include "trace.h"
+#include <trace/events/f2fs.h>
+
+static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct page *page = vmf->page;
+ struct inode *inode = file_inode(vma->vm_file);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct dnode_of_data dn;
+ int err;
+
+ f2fs_balance_fs(sbi);
+
+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
+ f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
+
+ /* block allocation */
+ f2fs_lock_op(sbi);
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_reserve_block(&dn, page->index);
+ if (err) {
+ f2fs_unlock_op(sbi);
+ goto out;
+ }
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
+
+ file_update_time(vma->vm_file);
+ lock_page(page);
+ if (unlikely(page->mapping != inode->i_mapping ||
+ page_offset(page) > i_size_read(inode) ||
+ !PageUptodate(page))) {
+ unlock_page(page);
+ err = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * check to see if the page is mapped already (no holes)
+ */
+ if (PageMappedToDisk(page))
+ goto mapped;
+
+ /* page is wholly or partially inside EOF */
+ if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) {
+ unsigned offset;
+ offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
+ zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+ }
+ set_page_dirty(page);
+ SetPageUptodate(page);
+
+ trace_f2fs_vm_page_mkwrite(page, DATA);
+mapped:
+ /* fill the page */
+ f2fs_wait_on_page_writeback(page, DATA);
+out:
+ return block_page_mkwrite_return(err);
+}
+
+static const struct vm_operations_struct f2fs_file_vm_ops = {
+ .fault = filemap_fault,
+ .page_mkwrite = f2fs_vm_page_mkwrite,
+};
+
+static int get_parent_ino(struct inode *inode, nid_t *pino)
+{
+ struct dentry *dentry;
+
+ inode = igrab(inode);
+ dentry = d_find_any_alias(inode);
+ iput(inode);
+ if (!dentry)
+ return 0;
+
+ if (update_dent_inode(inode, &dentry->d_name)) {
+ dput(dentry);
+ return 0;
+ }
+
+ *pino = parent_ino(dentry);
+ dput(dentry);
+ return 1;
+}
+
+static inline bool need_do_checkpoint(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ bool need_cp = false;
+
+ if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
+ need_cp = true;
+ else if (file_wrong_pino(inode))
+ need_cp = true;
+ else if (!space_for_roll_forward(sbi))
+ need_cp = true;
+ else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
+ need_cp = true;
+ else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
+ need_cp = true;
+ else if (test_opt(sbi, FASTBOOT))
+ need_cp = true;
+ else if (sbi->active_logs == 2)
+ need_cp = true;
+
+ return need_cp;
+}
+
+static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
+ bool ret = false;
+ /* But we need to avoid that there are some inode updates */
+ if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino))
+ ret = true;
+ f2fs_put_page(i, 0);
+ return ret;
+}
+
+static void try_to_fix_pino(struct inode *inode)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ nid_t pino;
+
+ down_write(&fi->i_sem);
+ fi->xattr_ver = 0;
+ if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
+ get_parent_ino(inode, &pino)) {
+ fi->i_pino = pino;
+ file_got_pino(inode);
+ up_write(&fi->i_sem);
+
+ mark_inode_dirty_sync(inode);
+ f2fs_write_inode(inode, NULL);
+ } else {
+ up_write(&fi->i_sem);
+ }
+}
+
+int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ nid_t ino = inode->i_ino;
+ int ret = 0;
+ bool need_cp = false;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .for_reclaim = 0,
+ };
+
+ if (unlikely(f2fs_readonly(inode->i_sb)))
+ return 0;
+
+ trace_f2fs_sync_file_enter(inode);
+
+ /* if fdatasync is triggered, let's do in-place-update */
+ if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
+ set_inode_flag(fi, FI_NEED_IPU);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ clear_inode_flag(fi, FI_NEED_IPU);
+
+ if (ret) {
+ trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
+ return ret;
+ }
+
+ /* if the inode is dirty, let's recover all the time */
+ if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) {
+ update_inode_page(inode);
+ goto go_write;
+ }
+
+ /*
+ * if there is no written data, don't waste time to write recovery info.
+ */
+ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
+ !exist_written_data(sbi, ino, APPEND_INO)) {
+
+ /* it may call write_inode just prior to fsync */
+ if (need_inode_page_update(sbi, ino))
+ goto go_write;
+
+ if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
+ exist_written_data(sbi, ino, UPDATE_INO))
+ goto flush_out;
+ goto out;
+ }
+go_write:
+ /* guarantee free sections for fsync */
+ f2fs_balance_fs(sbi);
+
+ /*
+ * Both of fdatasync() and fsync() are able to be recovered from
+ * sudden-power-off.
+ */
+ down_read(&fi->i_sem);
+ need_cp = need_do_checkpoint(inode);
+ up_read(&fi->i_sem);
+
+ if (need_cp) {
+ /* all the dirty node pages should be flushed for POR */
+ ret = f2fs_sync_fs(inode->i_sb, 1);
+
+ /*
+ * We've secured consistency through sync_fs. Following pino
+ * will be used only for fsynced inodes after checkpoint.
+ */
+ try_to_fix_pino(inode);
+ goto out;
+ }
+sync_nodes:
+ sync_node_pages(sbi, ino, &wbc);
+
+ /* if cp_error was enabled, we should avoid infinite loop */
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto out;
+
+ if (need_inode_block_update(sbi, ino)) {
+ mark_inode_dirty_sync(inode);
+ f2fs_write_inode(inode, NULL);
+ goto sync_nodes;
+ }
+
+ ret = wait_on_node_pages_writeback(sbi, ino);
+ if (ret)
+ goto out;
+
+ /* once recovery info is written, don't need to tack this */
+ remove_dirty_inode(sbi, ino, APPEND_INO);
+ clear_inode_flag(fi, FI_APPEND_WRITE);
+flush_out:
+ remove_dirty_inode(sbi, ino, UPDATE_INO);
+ clear_inode_flag(fi, FI_UPDATE_WRITE);
+ ret = f2fs_issue_flush(sbi);
+out:
+ trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
+ f2fs_trace_ios(NULL, NULL, 1);
+ return ret;
+}
+
+static pgoff_t __get_first_dirty_index(struct address_space *mapping,
+ pgoff_t pgofs, int whence)
+{
+ struct pagevec pvec;
+ int nr_pages;
+
+ if (whence != SEEK_DATA)
+ return 0;
+
+ /* find first dirty page index */
+ pagevec_init(&pvec, 0);
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
+ PAGECACHE_TAG_DIRTY, 1);
+ pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
+ pagevec_release(&pvec);
+ return pgofs;
+}
+
+static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
+ int whence)
+{
+ switch (whence) {
+ case SEEK_DATA:
+ if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+ (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+ return true;
+ break;
+ case SEEK_HOLE:
+ if (blkaddr == NULL_ADDR)
+ return true;
+ break;
+ }
+ return false;
+}
+
+static inline int unsigned_offsets(struct file *file)
+{
+ return file->f_mode & FMODE_UNSIGNED_OFFSET;
+}
+
+static loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
+{
+ if (offset < 0 && !unsigned_offsets(file))
+ return -EINVAL;
+ if (offset > maxsize)
+ return -EINVAL;
+
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ return offset;
+}
+
+static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+ struct dnode_of_data dn;
+ pgoff_t pgofs, end_offset, dirty;
+ loff_t data_ofs = offset;
+ loff_t isize;
+ int err = 0;
+
+ mutex_lock(&inode->i_mutex);
+
+ isize = i_size_read(inode);
+ if (offset >= isize)
+ goto fail;
+
+ /* handle inline data case */
+ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
+ if (whence == SEEK_HOLE)
+ data_ofs = isize;
+ goto found;
+ }
+
+ pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
+
+ dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
+
+ for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+ if (err && err != -ENOENT) {
+ goto fail;
+ } else if (err == -ENOENT) {
+ /* direct node does not exists */
+ if (whence == SEEK_DATA) {
+ pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
+ F2FS_I(inode));
+ continue;
+ } else {
+ goto found;
+ }
+ }
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+
+ /* find data/hole in dnode block */
+ for (; dn.ofs_in_node < end_offset;
+ dn.ofs_in_node++, pgofs++,
+ data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
+ block_t blkaddr;
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+
+ if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ f2fs_put_dnode(&dn);
+ goto found;
+ }
+ }
+ f2fs_put_dnode(&dn);
+ }
+
+ if (whence == SEEK_DATA)
+ goto fail;
+found:
+ if (whence == SEEK_HOLE && data_ofs > isize)
+ data_ofs = isize;
+ mutex_unlock(&inode->i_mutex);
+ return vfs_setpos(file, data_ofs, maxbytes);
+fail:
+ mutex_unlock(&inode->i_mutex);
+ return -ENXIO;
+}
+
+static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+
+ switch (whence) {
+ case SEEK_SET:
+ case SEEK_CUR:
+ case SEEK_END:
+ return generic_file_llseek_size(file, offset, whence,
+ maxbytes);
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ if (offset < 0)
+ return -ENXIO;
+ return f2fs_seek_block(file, offset, whence);
+ }
+
+ return -EINVAL;
+}
+
+static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *inode = file_inode(file);
+
+ /* we don't need to use inline_data strictly */
+ if (f2fs_has_inline_data(inode)) {
+ int err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
+
+ file_accessed(file);
+ vma->vm_ops = &f2fs_file_vm_ops;
+ return 0;
+}
+
+int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
+{
+ int nr_free = 0, ofs = dn->ofs_in_node;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ struct f2fs_node *raw_node;
+ __le32 *addr;
+
+ raw_node = F2FS_NODE(dn->node_page);
+ addr = blkaddr_in_node(raw_node) + ofs;
+
+ for (; count > 0; count--, addr++, dn->ofs_in_node++) {
+ block_t blkaddr = le32_to_cpu(*addr);
+ if (blkaddr == NULL_ADDR)
+ continue;
+
+ dn->data_blkaddr = NULL_ADDR;
+ update_extent_cache(dn);
+ invalidate_blocks(sbi, blkaddr);
+ nr_free++;
+ }
+ if (nr_free) {
+ dec_valid_block_count(sbi, dn->inode, nr_free);
+ set_page_dirty(dn->node_page);
+ sync_inode_page(dn);
+ }
+ dn->ofs_in_node = ofs;
+
+ trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
+ dn->ofs_in_node, nr_free);
+ return nr_free;
+}
+
+void truncate_data_blocks(struct dnode_of_data *dn)
+{
+ truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
+}
+
+static int truncate_partial_data_page(struct inode *inode, u64 from)
+{
+ unsigned offset = from & (PAGE_CACHE_SIZE - 1);
+ struct page *page;
+
+ if (!offset)
+ return 0;
+
+ page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false);
+ if (IS_ERR(page))
+ return 0;
+
+ lock_page(page);
+ if (unlikely(!PageUptodate(page) ||
+ page->mapping != inode->i_mapping))
+ goto out;
+
+ f2fs_wait_on_page_writeback(page, DATA);
+ zero_user(page, offset, PAGE_CACHE_SIZE - offset);
+ set_page_dirty(page);
+out:
+ f2fs_put_page(page, 1);
+ return 0;
+}
+
+int truncate_blocks(struct inode *inode, u64 from, bool lock)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int blocksize = inode->i_sb->s_blocksize;
+ struct dnode_of_data dn;
+ pgoff_t free_from;
+ int count = 0, err = 0;
+ struct page *ipage;
+
+ trace_f2fs_truncate_blocks_enter(inode, from);
+
+ free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1);
+
+ if (lock)
+ f2fs_lock_op(sbi);
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out;
+ }
+
+ if (f2fs_has_inline_data(inode)) {
+ f2fs_put_page(ipage, 1);
+ goto out;
+ }
+
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
+ if (err) {
+ if (err == -ENOENT)
+ goto free_next;
+ goto out;
+ }
+
+ count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+
+ count -= dn.ofs_in_node;
+ f2fs_bug_on(sbi, count < 0);
+
+ if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
+ truncate_data_blocks_range(&dn, count);
+ free_from += count;
+ }
+
+ f2fs_put_dnode(&dn);
+free_next:
+ err = truncate_inode_blocks(inode, free_from);
+out:
+ if (lock)
+ f2fs_unlock_op(sbi);
+
+ /* lastly zero out the first data page */
+ if (!err)
+ err = truncate_partial_data_page(inode, from);
+
+ trace_f2fs_truncate_blocks_exit(inode, err);
+ return err;
+}
+
+void f2fs_truncate(struct inode *inode)
+{
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return;
+
+ trace_f2fs_truncate(inode);
+
+ /* we should check inline_data size */
+ if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) {
+ if (f2fs_convert_inline_inode(inode))
+ return;
+ }
+
+ if (!truncate_blocks(inode, i_size_read(inode), true)) {
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ }
+}
+
+int f2fs_getattr(struct vfsmount *mnt,
+ struct dentry *dentry, struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ generic_fillattr(inode, stat);
+ stat->blocks <<= 3;
+ return 0;
+}
+
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+static void __setattr_copy(struct inode *inode, const struct iattr *attr)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned int ia_valid = attr->ia_valid;
+
+ if (ia_valid & ATTR_UID)
+ inode->i_uid = attr->ia_uid;
+ if (ia_valid & ATTR_GID)
+ inode->i_gid = attr->ia_gid;
+ if (ia_valid & ATTR_ATIME)
+ inode->i_atime = timespec_trunc(attr->ia_atime,
+ inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_MTIME)
+ inode->i_mtime = timespec_trunc(attr->ia_mtime,
+ inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_CTIME)
+ inode->i_ctime = timespec_trunc(attr->ia_ctime,
+ inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = attr->ia_mode;
+
+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ mode &= ~S_ISGID;
+ set_acl_inode(fi, mode);
+ }
+}
+#else
+#define __setattr_copy setattr_copy
+#endif
+
+int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ int err;
+
+ err = inode_change_ok(inode, attr);
+ if (err)
+ return err;
+
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (attr->ia_size != i_size_read(inode)) {
+ truncate_setsize(inode, attr->ia_size);
+ f2fs_truncate(inode);
+ f2fs_balance_fs(F2FS_I_SB(inode));
+ } else {
+ /*
+ * giving a chance to truncate blocks past EOF which
+ * are fallocated with FALLOC_FL_KEEP_SIZE.
+ */
+ f2fs_truncate(inode);
+ }
+ }
+
+ __setattr_copy(inode, attr);
+
+ if (attr->ia_valid & ATTR_MODE) {
+ err = f2fs_acl_chmod(inode);
+ if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {
+ inode->i_mode = fi->i_acl_mode;
+ clear_inode_flag(fi, FI_ACL_MODE);
+ }
+ }
+
+ mark_inode_dirty(inode);
+ return err;
+}
+
+const struct inode_operations f2fs_file_inode_operations = {
+ .getattr = f2fs_getattr,
+ .setattr = f2fs_setattr,
+ .get_acl = f2fs_get_acl,
+#ifdef CONFIG_F2FS_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = f2fs_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+ .fiemap = f2fs_fiemap,
+};
+
+static void fill_zero(struct inode *inode, pgoff_t index,
+ loff_t start, loff_t len)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct page *page;
+
+ if (!len)
+ return;
+
+ f2fs_balance_fs(sbi);
+
+ f2fs_lock_op(sbi);
+ page = get_new_data_page(inode, NULL, index, false);
+ f2fs_unlock_op(sbi);
+
+ if (!IS_ERR(page)) {
+ f2fs_wait_on_page_writeback(page, DATA);
+ zero_user(page, start, len);
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+ }
+}
+
+int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
+{
+ pgoff_t index;
+ int err;
+
+ for (index = pg_start; index < pg_end; index++) {
+ struct dnode_of_data dn;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err) {
+ if (err == -ENOENT)
+ continue;
+ return err;
+ }
+
+ if (dn.data_blkaddr != NULL_ADDR)
+ truncate_data_blocks_range(&dn, 1);
+ f2fs_put_dnode(&dn);
+ }
+ return 0;
+}
+
+static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
+{
+ pgoff_t pg_start, pg_end;
+ loff_t off_start, off_end;
+ int ret = 0;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ /* skip punching hole beyond i_size */
+ if (offset >= inode->i_size)
+ return ret;
+
+ if (f2fs_has_inline_data(inode)) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
+
+ pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
+ pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+
+ off_start = offset & (PAGE_CACHE_SIZE - 1);
+ off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+
+ if (pg_start == pg_end) {
+ fill_zero(inode, pg_start, off_start,
+ off_end - off_start);
+ } else {
+ if (off_start)
+ fill_zero(inode, pg_start++, off_start,
+ PAGE_CACHE_SIZE - off_start);
+ if (off_end)
+ fill_zero(inode, pg_end, 0, off_end);
+
+ if (pg_start < pg_end) {
+ struct address_space *mapping = inode->i_mapping;
+ loff_t blk_start, blk_end;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ f2fs_balance_fs(sbi);
+
+ blk_start = pg_start << PAGE_CACHE_SHIFT;
+ blk_end = pg_end << PAGE_CACHE_SHIFT;
+ truncate_inode_pages_range(mapping, blk_start,
+ blk_end - 1);
+
+ f2fs_lock_op(sbi);
+ ret = truncate_hole(inode, pg_start, pg_end);
+ f2fs_unlock_op(sbi);
+ }
+ }
+
+ return ret;
+}
+
+static int expand_inode_data(struct inode *inode, loff_t offset,
+ loff_t len, int mode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ pgoff_t index, pg_start, pg_end;
+ loff_t new_size = i_size_read(inode);
+ loff_t off_start, off_end;
+ int ret = 0;
+
+ f2fs_balance_fs(sbi);
+
+ ret = inode_newsize_ok(inode, (len + offset));
+ if (ret)
+ return ret;
+
+ if (f2fs_has_inline_data(inode)) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
+
+ pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
+ pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+
+ off_start = offset & (PAGE_CACHE_SIZE - 1);
+ off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+
+ f2fs_lock_op(sbi);
+
+ for (index = pg_start; index <= pg_end; index++) {
+ struct dnode_of_data dn;
+
+ if (index == pg_end && !off_end)
+ goto noalloc;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_reserve_block(&dn, index);
+ if (ret)
+ break;
+noalloc:
+ if (pg_start == pg_end)
+ new_size = offset + len;
+ else if (index == pg_start && off_start)
+ new_size = (index + 1) << PAGE_CACHE_SHIFT;
+ else if (index == pg_end)
+ new_size = (index << PAGE_CACHE_SHIFT) + off_end;
+ else
+ new_size += PAGE_CACHE_SIZE;
+ }
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ i_size_read(inode) < new_size) {
+ i_size_write(inode, new_size);
+ mark_inode_dirty(inode);
+ update_inode_page(inode);
+ }
+ f2fs_unlock_op(sbi);
+
+ return ret;
+}
+
+static long f2fs_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ long ret;
+
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&inode->i_mutex);
+
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ ret = punch_hole(inode, offset, len);
+ else
+ ret = expand_inode_data(inode, offset, len, mode);
+
+ if (!ret) {
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ }
+
+ mutex_unlock(&inode->i_mutex);
+
+ trace_f2fs_fallocate(inode, mode, offset, len, ret);
+ return ret;
+}
+
+static int f2fs_release_file(struct inode *inode, struct file *filp)
+{
+ /* some remained atomic pages should discarded */
+ if (f2fs_is_atomic_file(inode))
+ commit_inmem_pages(inode, true);
+ if (f2fs_is_volatile_file(inode)) {
+ set_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
+ filemap_fdatawrite(inode->i_mapping);
+ clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
+ }
+ return 0;
+}
+
+#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
+#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
+
+static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
+{
+ if (S_ISDIR(mode))
+ return flags;
+ else if (S_ISREG(mode))
+ return flags & F2FS_REG_FLMASK;
+ else
+ return flags & F2FS_OTHER_FLMASK;
+}
+
+static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
+ return put_user(flags, (int __user *)arg);
+}
+
+static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
+ unsigned int oldflags;
+ int ret;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ if (!inode_owner_or_capable(inode)) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ if (get_user(flags, (int __user *)arg)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ flags = f2fs_mask_flags(inode->i_mode, flags);
+
+ mutex_lock(&inode->i_mutex);
+
+ oldflags = fi->i_flags;
+
+ if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+ if (!capable(CAP_LINUX_IMMUTABLE)) {
+ mutex_unlock(&inode->i_mutex);
+ ret = -EPERM;
+ goto out;
+ }
+ }
+
+ flags = flags & FS_FL_USER_MODIFIABLE;
+ flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
+ fi->i_flags = flags;
+ mutex_unlock(&inode->i_mutex);
+
+ f2fs_set_inode_flags(inode);
+ inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+out:
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
+static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+
+ return put_user(inode->i_generation, (int __user *)arg);
+}
+
+static int f2fs_ioc_start_atomic_write(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ f2fs_balance_fs(F2FS_I_SB(inode));
+
+ if (f2fs_is_atomic_file(inode))
+ return 0;
+
+ set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+
+ return f2fs_convert_inline_inode(inode);
+}
+
+static int f2fs_ioc_commit_atomic_write(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+ int ret;
+
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ if (f2fs_is_volatile_file(inode))
+ return 0;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ if (f2fs_is_atomic_file(inode))
+ commit_inmem_pages(inode, false);
+
+ ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
+ mnt_drop_write_file(filp);
+ clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+ return ret;
+}
+
+static int f2fs_ioc_start_volatile_write(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ if (f2fs_is_volatile_file(inode))
+ return 0;
+
+ set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+
+ return f2fs_convert_inline_inode(inode);
+}
+
+static int f2fs_ioc_release_volatile_write(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ if (!f2fs_is_volatile_file(inode))
+ return 0;
+
+ punch_hole(inode, 0, F2FS_BLKSIZE);
+ return 0;
+}
+
+static int f2fs_ioc_abort_volatile_write(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+ int ret;
+
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ f2fs_balance_fs(F2FS_I_SB(inode));
+
+ if (f2fs_is_atomic_file(inode)) {
+ commit_inmem_pages(inode, false);
+ clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
+ }
+
+ if (f2fs_is_volatile_file(inode)) {
+ clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+ filemap_fdatawrite(inode->i_mapping);
+ set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
+ }
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
+static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct super_block *sb = sbi->sb;
+ __u32 in;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(in, (__u32 __user *)arg))
+ return -EFAULT;
+
+ switch (in) {
+ case FS_GOING_DOWN_FULLSYNC:
+ sb = freeze_bdev(sb->s_bdev);
+ if (sb && !IS_ERR(sb)) {
+ f2fs_stop_checkpoint(sbi);
+ thaw_bdev(sb->s_bdev, sb);
+ }
+ break;
+ case FS_GOING_DOWN_METASYNC:
+ /* do checkpoint only */
+ f2fs_sync_fs(sb, 1);
+ f2fs_stop_checkpoint(sbi);
+ break;
+ case FS_GOING_DOWN_NOSYNC:
+ f2fs_stop_checkpoint(sbi);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct super_block *sb = inode->i_sb;
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ struct fstrim_range range;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&range, (struct fstrim_range __user *)arg,
+ sizeof(range)))
+ return -EFAULT;
+
+ range.minlen = max((unsigned int)range.minlen,
+ q->limits.discard_granularity);
+ ret = f2fs_trim_fs(F2FS_SB(sb), &range);
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user((struct fstrim_range __user *)arg, &range,
+ sizeof(range)))
+ return -EFAULT;
+ return 0;
+}
+
+long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case F2FS_IOC_GETFLAGS:
+ return f2fs_ioc_getflags(filp, arg);
+ case F2FS_IOC_SETFLAGS:
+ return f2fs_ioc_setflags(filp, arg);
+ case F2FS_IOC_GETVERSION:
+ return f2fs_ioc_getversion(filp, arg);
+ case F2FS_IOC_START_ATOMIC_WRITE:
+ return f2fs_ioc_start_atomic_write(filp);
+ case F2FS_IOC_COMMIT_ATOMIC_WRITE:
+ return f2fs_ioc_commit_atomic_write(filp);
+ case F2FS_IOC_START_VOLATILE_WRITE:
+ return f2fs_ioc_start_volatile_write(filp);
+ case F2FS_IOC_RELEASE_VOLATILE_WRITE:
+ return f2fs_ioc_release_volatile_write(filp);
+ case F2FS_IOC_ABORT_VOLATILE_WRITE:
+ return f2fs_ioc_abort_volatile_write(filp);
+ case FS_IOC_SHUTDOWN:
+ return f2fs_ioc_shutdown(filp, arg);
+ case FITRIM:
+ return f2fs_ioc_fitrim(filp, arg);
+ default:
+ return -ENOTTY;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case F2FS_IOC32_GETFLAGS:
+ cmd = F2FS_IOC_GETFLAGS;
+ break;
+ case F2FS_IOC32_SETFLAGS:
+ cmd = F2FS_IOC_SETFLAGS;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+ return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+}
+#endif
+
+const struct file_operations f2fs_file_operations = {
+ .llseek = f2fs_llseek,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
+ .open = generic_file_open,
+ .release = f2fs_release_file,
+ .mmap = f2fs_file_mmap,
+ .fsync = f2fs_sync_file,
+ .fallocate = f2fs_fallocate,
+ .unlocked_ioctl = f2fs_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = f2fs_compat_ioctl,
+#endif
+ .splice_read = generic_file_splice_read,
+ .splice_write = generic_file_splice_write,
+};
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
new file mode 100644
index 0000000..5d6af02
--- /dev/null
+++ b/fs/f2fs/gc.c
@@ -0,0 +1,746 @@
+/*
+ * fs/f2fs/gc.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/backing-dev.h>
+#include <linux/init.h>
+#include <linux/f2fs_fs.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/blkdev.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "gc.h"
+#include <trace/events/f2fs.h>
+
+static int gc_thread_func(void *data)
+{
+ struct f2fs_sb_info *sbi = data;
+ struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+ wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
+ long wait_ms;
+
+ wait_ms = gc_th->min_sleep_time;
+
+ do {
+ if (try_to_freeze())
+ continue;
+ else
+ wait_event_interruptible_timeout(*wq,
+ kthread_should_stop(),
+ msecs_to_jiffies(wait_ms));
+ if (kthread_should_stop())
+ break;
+
+ if (sbi->sb->s_frozen >= SB_FREEZE_WRITE) {
+ increase_sleep_time(gc_th, &wait_ms);
+ continue;
+ }
+
+ /*
+ * [GC triggering condition]
+ * 0. GC is not conducted currently.
+ * 1. There are enough dirty segments.
+ * 2. IO subsystem is idle by checking the # of writeback pages.
+ * 3. IO subsystem is idle by checking the # of requests in
+ * bdev's request list.
+ *
+ * Note) We have to avoid triggering GCs frequently.
+ * Because it is possible that some segments can be
+ * invalidated soon after by user update or deletion.
+ * So, I'd like to wait some time to collect dirty segments.
+ */
+ if (!mutex_trylock(&sbi->gc_mutex))
+ continue;
+
+ if (!is_idle(sbi)) {
+ increase_sleep_time(gc_th, &wait_ms);
+ mutex_unlock(&sbi->gc_mutex);
+ continue;
+ }
+
+ if (has_enough_invalid_blocks(sbi))
+ decrease_sleep_time(gc_th, &wait_ms);
+ else
+ increase_sleep_time(gc_th, &wait_ms);
+
+ stat_inc_bggc_count(sbi);
+
+ /* if return value is not zero, no victim was selected */
+ if (f2fs_gc(sbi))
+ wait_ms = gc_th->no_gc_sleep_time;
+
+ /* balancing f2fs's metadata periodically */
+ f2fs_balance_fs_bg(sbi);
+
+ } while (!kthread_should_stop());
+ return 0;
+}
+
+int start_gc_thread(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_gc_kthread *gc_th;
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ int err = 0;
+
+ gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
+ if (!gc_th) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
+ gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
+ gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
+
+ gc_th->gc_idle = 0;
+
+ sbi->gc_thread = gc_th;
+ init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
+ sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
+ "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(gc_th->f2fs_gc_task)) {
+ err = PTR_ERR(gc_th->f2fs_gc_task);
+ kfree(gc_th);
+ sbi->gc_thread = NULL;
+ }
+out:
+ return err;
+}
+
+void stop_gc_thread(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+ if (!gc_th)
+ return;
+ kthread_stop(gc_th->f2fs_gc_task);
+ kfree(gc_th);
+ sbi->gc_thread = NULL;
+}
+
+static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
+{
+ int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+
+ if (gc_th && gc_th->gc_idle) {
+ if (gc_th->gc_idle == 1)
+ gc_mode = GC_CB;
+ else if (gc_th->gc_idle == 2)
+ gc_mode = GC_GREEDY;
+ }
+ return gc_mode;
+}
+
+static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
+ int type, struct victim_sel_policy *p)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+ if (p->alloc_mode == SSR) {
+ p->gc_mode = GC_GREEDY;
+ p->dirty_segmap = dirty_i->dirty_segmap[type];
+ p->max_search = dirty_i->nr_dirty[type];
+ p->ofs_unit = 1;
+ } else {
+ p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
+ p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
+ p->max_search = dirty_i->nr_dirty[DIRTY];
+ p->ofs_unit = sbi->segs_per_sec;
+ }
+
+ if (p->max_search > sbi->max_victim_search)
+ p->max_search = sbi->max_victim_search;
+
+ p->offset = sbi->last_victim[p->gc_mode];
+}
+
+static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ /* SSR allocates in a segment unit */
+ if (p->alloc_mode == SSR)
+ return 1 << sbi->log_blocks_per_seg;
+ if (p->gc_mode == GC_GREEDY)
+ return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
+ else if (p->gc_mode == GC_CB)
+ return UINT_MAX;
+ else /* No other gc_mode */
+ return 0;
+}
+
+static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int secno;
+
+ /*
+ * If the gc_type is FG_GC, we can select victim segments
+ * selected by background GC before.
+ * Those segments guarantee they have small valid blocks.
+ */
+ for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
+ if (sec_usage_check(sbi, secno))
+ continue;
+ clear_bit(secno, dirty_i->victim_secmap);
+ return secno * sbi->segs_per_sec;
+ }
+ return NULL_SEGNO;
+}
+
+static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ unsigned int secno = GET_SECNO(sbi, segno);
+ unsigned int start = secno * sbi->segs_per_sec;
+ unsigned long long mtime = 0;
+ unsigned int vblocks;
+ unsigned char age = 0;
+ unsigned char u;
+ unsigned int i;
+
+ for (i = 0; i < sbi->segs_per_sec; i++)
+ mtime += get_seg_entry(sbi, start + i)->mtime;
+ vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+
+ mtime = div_u64(mtime, sbi->segs_per_sec);
+ vblocks = div_u64(vblocks, sbi->segs_per_sec);
+
+ u = (vblocks * 100) >> sbi->log_blocks_per_seg;
+
+ /* Handle if the system time has changed by the user */
+ if (mtime < sit_i->min_mtime)
+ sit_i->min_mtime = mtime;
+ if (mtime > sit_i->max_mtime)
+ sit_i->max_mtime = mtime;
+ if (sit_i->max_mtime != sit_i->min_mtime)
+ age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
+ sit_i->max_mtime - sit_i->min_mtime);
+
+ return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
+}
+
+static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
+ unsigned int segno, struct victim_sel_policy *p)
+{
+ if (p->alloc_mode == SSR)
+ return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+
+ /* alloc_mode == LFS */
+ if (p->gc_mode == GC_GREEDY)
+ return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+ else
+ return get_cb_cost(sbi, segno);
+}
+
+/*
+ * This function is called from two paths.
+ * One is garbage collection and the other is SSR segment selection.
+ * When it is called during GC, it just gets a victim segment
+ * and it does not remove it from dirty seglist.
+ * When it is called from SSR segment selection, it finds a segment
+ * which has minimum valid blocks and removes it from dirty seglist.
+ */
+static int get_victim_by_default(struct f2fs_sb_info *sbi,
+ unsigned int *result, int gc_type, int type, char alloc_mode)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ struct victim_sel_policy p;
+ unsigned int secno, max_cost;
+ int nsearched = 0;
+
+ mutex_lock(&dirty_i->seglist_lock);
+
+ p.alloc_mode = alloc_mode;
+ select_policy(sbi, gc_type, type, &p);
+
+ p.min_segno = NULL_SEGNO;
+ p.min_cost = max_cost = get_max_cost(sbi, &p);
+
+ if (p.alloc_mode == LFS && gc_type == FG_GC) {
+ p.min_segno = check_bg_victims(sbi);
+ if (p.min_segno != NULL_SEGNO)
+ goto got_it;
+ }
+
+ while (1) {
+ unsigned long cost;
+ unsigned int segno;
+
+ segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset);
+ if (segno >= MAIN_SEGS(sbi)) {
+ if (sbi->last_victim[p.gc_mode]) {
+ sbi->last_victim[p.gc_mode] = 0;
+ p.offset = 0;
+ continue;
+ }
+ break;
+ }
+
+ p.offset = segno + p.ofs_unit;
+ if (p.ofs_unit > 1)
+ p.offset -= segno % p.ofs_unit;
+
+ secno = GET_SECNO(sbi, segno);
+
+ if (sec_usage_check(sbi, secno))
+ continue;
+ if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
+ continue;
+
+ cost = get_gc_cost(sbi, segno, &p);
+
+ if (p.min_cost > cost) {
+ p.min_segno = segno;
+ p.min_cost = cost;
+ } else if (unlikely(cost == max_cost)) {
+ continue;
+ }
+
+ if (nsearched++ >= p.max_search) {
+ sbi->last_victim[p.gc_mode] = segno;
+ break;
+ }
+ }
+ if (p.min_segno != NULL_SEGNO) {
+got_it:
+ if (p.alloc_mode == LFS) {
+ secno = GET_SECNO(sbi, p.min_segno);
+ if (gc_type == FG_GC)
+ sbi->cur_victim_sec = secno;
+ else
+ set_bit(secno, dirty_i->victim_secmap);
+ }
+ *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
+
+ trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
+ sbi->cur_victim_sec,
+ prefree_segments(sbi), free_segments(sbi));
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+
+ return (p.min_segno == NULL_SEGNO) ? 0 : 1;
+}
+
+static const struct victim_selection default_v_ops = {
+ .get_victim = get_victim_by_default,
+};
+
+static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
+{
+ struct inode_entry *ie;
+
+ ie = radix_tree_lookup(&gc_list->iroot, ino);
+ if (ie)
+ return ie->inode;
+ return NULL;
+}
+
+static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
+{
+ struct inode_entry *new_ie;
+
+ if (inode == find_gc_inode(gc_list, inode->i_ino)) {
+ iput(inode);
+ return;
+ }
+ new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ new_ie->inode = inode;
+
+ f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
+ list_add_tail(&new_ie->list, &gc_list->ilist);
+}
+
+static void put_gc_inode(struct gc_inode_list *gc_list)
+{
+ struct inode_entry *ie, *next_ie;
+ list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
+ radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
+ iput(ie->inode);
+ list_del(&ie->list);
+ kmem_cache_free(inode_entry_slab, ie);
+ }
+}
+
+static int check_valid_map(struct f2fs_sb_info *sbi,
+ unsigned int segno, int offset)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct seg_entry *sentry;
+ int ret;
+
+ mutex_lock(&sit_i->sentry_lock);
+ sentry = get_seg_entry(sbi, segno);
+ ret = f2fs_test_bit(offset, sentry->cur_valid_map);
+ mutex_unlock(&sit_i->sentry_lock);
+ return ret;
+}
+
+/*
+ * This function compares node address got in summary with that in NAT.
+ * On validity, copy that node with cold status, otherwise (invalid node)
+ * ignore that.
+ */
+static void gc_node_segment(struct f2fs_sb_info *sbi,
+ struct f2fs_summary *sum, unsigned int segno, int gc_type)
+{
+ bool initial = true;
+ struct f2fs_summary *entry;
+ int off;
+
+next_step:
+ entry = sum;
+
+ for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+ nid_t nid = le32_to_cpu(entry->nid);
+ struct page *node_page;
+
+ /* stop BG_GC if there is not enough free sections. */
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
+ return;
+
+ if (check_valid_map(sbi, segno, off) == 0)
+ continue;
+
+ if (initial) {
+ ra_node_page(sbi, nid);
+ continue;
+ }
+ node_page = get_node_page(sbi, nid);
+ if (IS_ERR(node_page))
+ continue;
+
+ /* block may become invalid during get_node_page */
+ if (check_valid_map(sbi, segno, off) == 0) {
+ f2fs_put_page(node_page, 1);
+ continue;
+ }
+
+ /* set page dirty and write it */
+ if (gc_type == FG_GC) {
+ f2fs_wait_on_page_writeback(node_page, NODE);
+ set_page_dirty(node_page);
+ } else {
+ if (!PageWriteback(node_page))
+ set_page_dirty(node_page);
+ }
+ f2fs_put_page(node_page, 1);
+ stat_inc_node_blk_count(sbi, 1);
+ }
+
+ if (initial) {
+ initial = false;
+ goto next_step;
+ }
+
+ if (gc_type == FG_GC) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .for_reclaim = 0,
+ };
+ sync_node_pages(sbi, 0, &wbc);
+
+ /*
+ * In the case of FG_GC, it'd be better to reclaim this victim
+ * completely.
+ */
+ if (get_valid_blocks(sbi, segno, 1) != 0)
+ goto next_step;
+ }
+}
+
+/*
+ * Calculate start block index indicating the given node offset.
+ * Be careful, caller should give this node offset only indicating direct node
+ * blocks. If any node offsets, which point the other types of node blocks such
+ * as indirect or double indirect node blocks, are given, it must be a caller's
+ * bug.
+ */
+block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
+{
+ unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
+ unsigned int bidx;
+
+ if (node_ofs == 0)
+ return 0;
+
+ if (node_ofs <= 2) {
+ bidx = node_ofs - 1;
+ } else if (node_ofs <= indirect_blks) {
+ int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
+ bidx = node_ofs - 2 - dec;
+ } else {
+ int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
+ bidx = node_ofs - 5 - dec;
+ }
+ return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
+}
+
+static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+ struct node_info *dni, block_t blkaddr, unsigned int *nofs)
+{
+ struct page *node_page;
+ nid_t nid;
+ unsigned int ofs_in_node;
+ block_t source_blkaddr;
+
+ nid = le32_to_cpu(sum->nid);
+ ofs_in_node = le16_to_cpu(sum->ofs_in_node);
+
+ node_page = get_node_page(sbi, nid);
+ if (IS_ERR(node_page))
+ return 0;
+
+ get_node_info(sbi, nid, dni);
+
+ if (sum->version != dni->version) {
+ f2fs_put_page(node_page, 1);
+ return 0;
+ }
+
+ *nofs = ofs_of_node(node_page);
+ source_blkaddr = datablock_addr(node_page, ofs_in_node);
+ f2fs_put_page(node_page, 1);
+
+ if (source_blkaddr != blkaddr)
+ return 0;
+ return 1;
+}
+
+static void move_data_page(struct inode *inode, struct page *page, int gc_type)
+{
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = WRITE_SYNC,
+ };
+
+ if (gc_type == BG_GC) {
+ if (PageWriteback(page))
+ goto out;
+ set_page_dirty(page);
+ set_cold_data(page);
+ } else {
+ f2fs_wait_on_page_writeback(page, DATA);
+
+ if (clear_page_dirty_for_io(page))
+ inode_dec_dirty_pages(inode);
+ set_cold_data(page);
+ do_write_data_page(page, &fio);
+ clear_cold_data(page);
+ }
+out:
+ f2fs_put_page(page, 1);
+}
+
+/*
+ * This function tries to get parent node of victim data block, and identifies
+ * data block validity. If the block is valid, copy that with cold status and
+ * modify parent node.
+ * If the parent node is not valid or the data block address is different,
+ * the victim data block is ignored.
+ */
+static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+ struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
+{
+ struct super_block *sb = sbi->sb;
+ struct f2fs_summary *entry;
+ block_t start_addr;
+ int off;
+ int phase = 0;
+
+ start_addr = START_BLOCK(sbi, segno);
+
+next_step:
+ entry = sum;
+
+ for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+ struct page *data_page;
+ struct inode *inode;
+ struct node_info dni; /* dnode info for the data */
+ unsigned int ofs_in_node, nofs;
+ block_t start_bidx;
+
+ /* stop BG_GC if there is not enough free sections. */
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
+ return;
+
+ if (check_valid_map(sbi, segno, off) == 0)
+ continue;
+
+ if (phase == 0) {
+ ra_node_page(sbi, le32_to_cpu(entry->nid));
+ continue;
+ }
+
+ /* Get an inode by ino with checking validity */
+ if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0)
+ continue;
+
+ if (phase == 1) {
+ ra_node_page(sbi, dni.ino);
+ continue;
+ }
+
+ ofs_in_node = le16_to_cpu(entry->ofs_in_node);
+
+ if (phase == 2) {
+ inode = f2fs_iget(sb, dni.ino);
+ if (IS_ERR(inode) || is_bad_inode(inode))
+ continue;
+
+ start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+
+ data_page = find_data_page(inode,
+ start_bidx + ofs_in_node, false);
+ if (IS_ERR(data_page)) {
+ iput(inode);
+ continue;
+ }
+
+ f2fs_put_page(data_page, 0);
+ add_gc_inode(gc_list, inode);
+ continue;
+ }
+
+ /* phase 3 */
+ inode = find_gc_inode(gc_list, dni.ino);
+ if (inode) {
+ start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+ data_page = get_lock_data_page(inode,
+ start_bidx + ofs_in_node);
+ if (IS_ERR(data_page))
+ continue;
+ move_data_page(inode, data_page, gc_type);
+ stat_inc_data_blk_count(sbi, 1);
+ }
+ }
+
+ if (++phase < 4)
+ goto next_step;
+
+ if (gc_type == FG_GC) {
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+
+ /*
+ * In the case of FG_GC, it'd be better to reclaim this victim
+ * completely.
+ */
+ if (get_valid_blocks(sbi, segno, 1) != 0) {
+ phase = 2;
+ goto next_step;
+ }
+ }
+}
+
+static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
+ int gc_type)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ int ret;
+
+ mutex_lock(&sit_i->sentry_lock);
+ ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
+ NO_CHECK_TYPE, LFS);
+ mutex_unlock(&sit_i->sentry_lock);
+ return ret;
+}
+
+static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
+ struct gc_inode_list *gc_list, int gc_type)
+{
+ struct page *sum_page;
+ struct f2fs_summary_block *sum;
+ struct blk_plug plug;
+
+ /* read segment summary of victim */
+ sum_page = get_sum_page(sbi, segno);
+
+ blk_start_plug(&plug);
+
+ sum = page_address(sum_page);
+
+ switch (GET_SUM_TYPE((&sum->footer))) {
+ case SUM_TYPE_NODE:
+ gc_node_segment(sbi, sum->entries, segno, gc_type);
+ break;
+ case SUM_TYPE_DATA:
+ gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type);
+ break;
+ }
+ blk_finish_plug(&plug);
+
+ stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
+ stat_inc_call_count(sbi->stat_info);
+
+ f2fs_put_page(sum_page, 1);
+}
+
+int f2fs_gc(struct f2fs_sb_info *sbi)
+{
+ unsigned int segno, i;
+ int gc_type = BG_GC;
+ int nfree = 0;
+ int ret = -1;
+ struct cp_control cpc;
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(GFP_NOFS),
+ };
+
+ cpc.reason = __get_cp_reason(sbi);
+gc_more:
+ if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
+ goto stop;
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto stop;
+
+ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
+ gc_type = FG_GC;
+ write_checkpoint(sbi, &cpc);
+ }
+
+ if (!__get_victim(sbi, &segno, gc_type))
+ goto stop;
+ ret = 0;
+
+ /* readahead multi ssa blocks those have contiguous address */
+ if (sbi->segs_per_sec > 1)
+ ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
+ META_SSA);
+
+ for (i = 0; i < sbi->segs_per_sec; i++)
+ do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
+
+ if (gc_type == FG_GC) {
+ sbi->cur_victim_sec = NULL_SEGNO;
+ nfree++;
+ WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec));
+ }
+
+ if (has_not_enough_free_secs(sbi, nfree))
+ goto gc_more;
+
+ if (gc_type == FG_GC)
+ write_checkpoint(sbi, &cpc);
+stop:
+ mutex_unlock(&sbi->gc_mutex);
+
+ put_gc_inode(&gc_list);
+ return ret;
+}
+
+void build_gc_manager(struct f2fs_sb_info *sbi)
+{
+ DIRTY_I(sbi)->v_ops = &default_v_ops;
+}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
new file mode 100644
index 0000000..9091e0c
--- /dev/null
+++ b/fs/f2fs/gc.h
@@ -0,0 +1,110 @@
+/*
+ * fs/f2fs/gc.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define GC_THREAD_MIN_WB_PAGES 1 /*
+ * a threshold to determine
+ * whether IO subsystem is idle
+ * or not
+ */
+#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
+#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
+#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
+#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
+#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
+
+/* Search max. number of dirty segments to select a victim segment */
+#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
+
+struct f2fs_gc_kthread {
+ struct task_struct *f2fs_gc_task;
+ wait_queue_head_t gc_wait_queue_head;
+
+ /* for gc sleep time */
+ unsigned int min_sleep_time;
+ unsigned int max_sleep_time;
+ unsigned int no_gc_sleep_time;
+
+ /* for changing gc mode */
+ unsigned int gc_idle;
+};
+
+struct gc_inode_list {
+ struct list_head ilist;
+ struct radix_tree_root iroot;
+};
+
+/*
+ * inline functions
+ */
+static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
+{
+ if (free_segments(sbi) < overprovision_segments(sbi))
+ return 0;
+ else
+ return (free_segments(sbi) - overprovision_segments(sbi))
+ << sbi->log_blocks_per_seg;
+}
+
+static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
+{
+ return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
+}
+
+static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
+{
+ block_t reclaimable_user_blocks = sbi->user_block_count -
+ written_block_count(sbi);
+ return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
+}
+
+static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th,
+ long *wait)
+{
+ if (*wait == gc_th->no_gc_sleep_time)
+ return;
+
+ *wait += gc_th->min_sleep_time;
+ if (*wait > gc_th->max_sleep_time)
+ *wait = gc_th->max_sleep_time;
+}
+
+static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
+ long *wait)
+{
+ if (*wait == gc_th->no_gc_sleep_time)
+ *wait = gc_th->max_sleep_time;
+
+ *wait -= gc_th->min_sleep_time;
+ if (*wait <= gc_th->min_sleep_time)
+ *wait = gc_th->min_sleep_time;
+}
+
+static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
+{
+ block_t invalid_user_blocks = sbi->user_block_count -
+ written_block_count(sbi);
+ /*
+ * Background GC is triggered with the following conditions.
+ * 1. There are a number of invalid blocks.
+ * 2. There is not enough free space.
+ */
+ if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
+ free_user_blocks(sbi) < limit_free_user_blocks(sbi))
+ return true;
+ return false;
+}
+
+static inline int is_idle(struct f2fs_sb_info *sbi)
+{
+ struct block_device *bdev = sbi->sb->s_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct request_list *rl = &q->rq;
+ return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
+}
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
new file mode 100644
index 0000000..a844fcf
--- /dev/null
+++ b/fs/f2fs/hash.c
@@ -0,0 +1,104 @@
+/*
+ * fs/f2fs/hash.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext3/hash.c
+ *
+ * Copyright (C) 2002 by Theodore Ts'o
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/cryptohash.h>
+#include <linux/pagemap.h>
+
+#include "f2fs.h"
+
+/*
+ * Hashing code copied from ext3
+ */
+#define DELTA 0x9E3779B9
+
+static void TEA_transform(unsigned int buf[4], unsigned int const in[])
+{
+ __u32 sum = 0;
+ __u32 b0 = buf[0], b1 = buf[1];
+ __u32 a = in[0], b = in[1], c = in[2], d = in[3];
+ int n = 16;
+
+ do {
+ sum += DELTA;
+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
+ } while (--n);
+
+ buf[0] += b0;
+ buf[1] += b1;
+}
+
+static void str2hashbuf(const unsigned char *msg, size_t len,
+ unsigned int *buf, int num)
+{
+ unsigned pad, val;
+ int i;
+
+ pad = (__u32)len | ((__u32)len << 8);
+ pad |= pad << 16;
+
+ val = pad;
+ if (len > num * 4)
+ len = num * 4;
+ for (i = 0; i < len; i++) {
+ if ((i % 4) == 0)
+ val = pad;
+ val = msg[i] + (val << 8);
+ if ((i % 4) == 3) {
+ *buf++ = val;
+ val = pad;
+ num--;
+ }
+ }
+ if (--num >= 0)
+ *buf++ = val;
+ while (--num >= 0)
+ *buf++ = pad;
+}
+
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
+{
+ __u32 hash;
+ f2fs_hash_t f2fs_hash;
+ const unsigned char *p;
+ __u32 in[8], buf[4];
+ const unsigned char *name = name_info->name;
+ size_t len = name_info->len;
+
+ if ((len <= 2) && (name[0] == '.') &&
+ (name[1] == '.' || name[1] == '\0'))
+ return 0;
+
+ /* Initialize the default seed for the hash checksum functions */
+ buf[0] = 0x67452301;
+ buf[1] = 0xefcdab89;
+ buf[2] = 0x98badcfe;
+ buf[3] = 0x10325476;
+
+ p = name;
+ while (1) {
+ str2hashbuf(p, len, in, 4);
+ TEA_transform(buf, in);
+ p += 16;
+ if (len <= 16)
+ break;
+ len -= 16;
+ }
+ hash = buf[0];
+ f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
+ return f2fs_hash;
+}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
new file mode 100644
index 0000000..42da76b
--- /dev/null
+++ b/fs/f2fs/inline.c
@@ -0,0 +1,527 @@
+/*
+ * fs/f2fs/inline.c
+ * Copyright (c) 2013, Intel Corporation
+ * Authors: Huajun Li <huajun.li@intel.com>
+ * Haicheng Li <haicheng.li@intel.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+
+bool f2fs_may_inline(struct inode *inode)
+{
+ if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
+ return false;
+
+ if (f2fs_is_atomic_file(inode))
+ return false;
+
+ if (!S_ISREG(inode->i_mode))
+ return false;
+
+ if (i_size_read(inode) > MAX_INLINE_DATA)
+ return false;
+
+ return true;
+}
+
+void read_inline_data(struct page *page, struct page *ipage)
+{
+ void *src_addr, *dst_addr;
+
+ if (PageUptodate(page))
+ return;
+
+ f2fs_bug_on(F2FS_P_SB(page), page->index);
+
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+
+ /* Copy the whole inline data block */
+ src_addr = inline_data_addr(ipage);
+ dst_addr = kmap_atomic(page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ flush_dcache_page(page);
+ kunmap_atomic(dst_addr);
+ SetPageUptodate(page);
+}
+
+static void truncate_inline_data(struct page *ipage)
+{
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA);
+}
+
+int f2fs_read_inline_data(struct inode *inode, struct page *page)
+{
+ struct page *ipage;
+
+ ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ if (IS_ERR(ipage)) {
+ unlock_page(page);
+ return PTR_ERR(ipage);
+ }
+
+ if (!f2fs_has_inline_data(inode)) {
+ f2fs_put_page(ipage, 1);
+ return -EAGAIN;
+ }
+
+ if (page->index)
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ else
+ read_inline_data(page, ipage);
+
+ SetPageUptodate(page);
+ f2fs_put_page(ipage, 1);
+ unlock_page(page);
+ return 0;
+}
+
+int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
+{
+ void *src_addr, *dst_addr;
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = WRITE_SYNC | REQ_PRIO,
+ };
+ int dirty, err;
+
+ f2fs_bug_on(F2FS_I_SB(dn->inode), page->index);
+
+ if (!f2fs_exist_data(dn->inode))
+ goto clear_out;
+
+ err = f2fs_reserve_block(dn, 0);
+ if (err)
+ return err;
+
+ f2fs_wait_on_page_writeback(page, DATA);
+
+ if (PageUptodate(page))
+ goto no_update;
+
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+
+ /* Copy the whole inline data block */
+ src_addr = inline_data_addr(dn->inode_page);
+ dst_addr = kmap_atomic(page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ flush_dcache_page(page);
+ kunmap_atomic(dst_addr);
+ SetPageUptodate(page);
+no_update:
+ /* clear dirty state */
+ dirty = clear_page_dirty_for_io(page);
+
+ /* write data page to try to make data consistent */
+ set_page_writeback(page);
+ fio.blk_addr = dn->data_blkaddr;
+ write_data_page(page, dn, &fio);
+ update_extent_cache(dn);
+ f2fs_wait_on_page_writeback(page, DATA);
+ if (dirty)
+ inode_dec_dirty_pages(dn->inode);
+
+ /* this converted inline_data should be recovered. */
+ set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
+
+ /* clear inline data and flag after data writeback */
+ truncate_inline_data(dn->inode_page);
+clear_out:
+ stat_dec_inline_inode(dn->inode);
+ f2fs_clear_inline_inode(dn->inode);
+ sync_inode_page(dn);
+ f2fs_put_dnode(dn);
+ return 0;
+}
+
+int f2fs_convert_inline_inode(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct dnode_of_data dn;
+ struct page *ipage, *page;
+ int err = 0;
+
+ page = grab_cache_page(inode->i_mapping, 0);
+ if (!page)
+ return -ENOMEM;
+
+ f2fs_lock_op(sbi);
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out;
+ }
+
+ set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+ if (f2fs_has_inline_data(inode))
+ err = f2fs_convert_inline_page(&dn, page);
+
+ f2fs_put_dnode(&dn);
+out:
+ f2fs_unlock_op(sbi);
+
+ f2fs_put_page(page, 1);
+ return err;
+}
+
+int f2fs_write_inline_data(struct inode *inode, struct page *page)
+{
+ void *src_addr, *dst_addr;
+ struct dnode_of_data dn;
+ int err;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
+ if (err)
+ return err;
+
+ if (!f2fs_has_inline_data(inode)) {
+ f2fs_put_dnode(&dn);
+ return -EAGAIN;
+ }
+
+ f2fs_bug_on(F2FS_I_SB(inode), page->index);
+
+ f2fs_wait_on_page_writeback(dn.inode_page, NODE);
+ src_addr = kmap_atomic(page);
+ dst_addr = inline_data_addr(dn.inode_page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ kunmap_atomic(src_addr);
+
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
+ sync_inode_page(&dn);
+ f2fs_put_dnode(&dn);
+ return 0;
+}
+
+bool recover_inline_data(struct inode *inode, struct page *npage)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode *ri = NULL;
+ void *src_addr, *dst_addr;
+ struct page *ipage;
+
+ /*
+ * The inline_data recovery policy is as follows.
+ * [prev.] [next] of inline_data flag
+ * o o -> recover inline_data
+ * o x -> remove inline_data, and then recover data blocks
+ * x o -> remove inline_data, and then recover inline_data
+ * x x -> recover data blocks
+ */
+ if (IS_INODE(npage))
+ ri = F2FS_INODE(npage);
+
+ if (f2fs_has_inline_data(inode) &&
+ ri && (ri->i_inline & F2FS_INLINE_DATA)) {
+process_inline:
+ ipage = get_node_page(sbi, inode->i_ino);
+ f2fs_bug_on(sbi, IS_ERR(ipage));
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
+ src_addr = inline_data_addr(npage);
+ dst_addr = inline_data_addr(ipage);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+ return true;
+ }
+
+ if (f2fs_has_inline_data(inode)) {
+ ipage = get_node_page(sbi, inode->i_ino);
+ f2fs_bug_on(sbi, IS_ERR(ipage));
+ truncate_inline_data(ipage);
+ f2fs_clear_inline_inode(inode);
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+ } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
+ truncate_blocks(inode, 0, false);
+ goto process_inline;
+ }
+ return false;
+}
+
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+ struct qstr *name, struct page **res_page)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ struct f2fs_inline_dentry *inline_dentry;
+ struct f2fs_dir_entry *de;
+ struct f2fs_dentry_ptr d;
+ struct page *ipage;
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return NULL;
+
+ inline_dentry = inline_data_addr(ipage);
+
+ make_dentry_ptr(&d, (void *)inline_dentry, 2);
+ de = find_target_dentry(name, NULL, &d);
+
+ unlock_page(ipage);
+ if (de)
+ *res_page = ipage;
+ else
+ f2fs_put_page(ipage, 0);
+
+ /*
+ * For the most part, it should be a bug when name_len is zero.
+ * We stop here for figuring out where the bugs has occurred.
+ */
+ f2fs_bug_on(sbi, d.max < 0);
+ return de;
+}
+
+struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir,
+ struct page **p)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ struct f2fs_dir_entry *de;
+ struct f2fs_inline_dentry *dentry_blk;
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return NULL;
+
+ dentry_blk = inline_data_addr(ipage);
+ de = &dentry_blk->dentry[1];
+ *p = ipage;
+ unlock_page(ipage);
+ return de;
+}
+
+int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+ struct page *ipage)
+{
+ struct f2fs_inline_dentry *dentry_blk;
+ struct f2fs_dentry_ptr d;
+
+ dentry_blk = inline_data_addr(ipage);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 2);
+ do_make_empty_dir(inode, parent, &d);
+
+ set_page_dirty(ipage);
+
+ /* update i_size to MAX_INLINE_DATA */
+ if (i_size_read(inode) < MAX_INLINE_DATA) {
+ i_size_write(inode, MAX_INLINE_DATA);
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
+ }
+ return 0;
+}
+
+static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
+ struct f2fs_inline_dentry *inline_dentry)
+{
+ struct page *page;
+ struct dnode_of_data dn;
+ struct f2fs_dentry_block *dentry_blk;
+ int err;
+
+ page = grab_cache_page(dir->i_mapping, 0);
+ if (!page)
+ return -ENOMEM;
+
+ set_new_dnode(&dn, dir, ipage, NULL, 0);
+ err = f2fs_reserve_block(&dn, 0);
+ if (err)
+ goto out;
+
+ f2fs_wait_on_page_writeback(page, DATA);
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+
+ dentry_blk = kmap_atomic(page);
+
+ /* copy data from inline dentry block to new dentry block */
+ memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap,
+ INLINE_DENTRY_BITMAP_SIZE);
+ memcpy(dentry_blk->dentry, inline_dentry->dentry,
+ sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY);
+ memcpy(dentry_blk->filename, inline_dentry->filename,
+ NR_INLINE_DENTRY * F2FS_SLOT_LEN);
+
+ kunmap_atomic(dentry_blk);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ /* clear inline dir and flag after data writeback */
+ truncate_inline_data(ipage);
+
+ stat_dec_inline_dir(dir);
+ clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
+
+ if (i_size_read(dir) < PAGE_CACHE_SIZE) {
+ i_size_write(dir, PAGE_CACHE_SIZE);
+ set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
+
+ sync_inode_page(&dn);
+out:
+ f2fs_put_page(page, 1);
+ return err;
+}
+
+int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
+ struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ unsigned int bit_pos;
+ f2fs_hash_t name_hash;
+ struct f2fs_dir_entry *de;
+ size_t namelen = name->len;
+ struct f2fs_inline_dentry *dentry_blk = NULL;
+ int slots = GET_DENTRY_SLOTS(namelen);
+ struct page *page;
+ int err = 0;
+ int i;
+
+ name_hash = f2fs_dentry_hash(name);
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return PTR_ERR(ipage);
+
+ dentry_blk = inline_data_addr(ipage);
+ bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+ slots, NR_INLINE_DENTRY);
+ if (bit_pos >= NR_INLINE_DENTRY) {
+ err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
+ if (!err)
+ err = -EAGAIN;
+ goto out;
+ }
+
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, name, ipage);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ de = &dentry_blk->dentry[bit_pos];
+ de->hash_code = name_hash;
+ de->name_len = cpu_to_le16(namelen);
+ memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
+ de->ino = cpu_to_le32(inode->i_ino);
+ set_de_type(de, inode);
+ for (i = 0; i < slots; i++)
+ test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+ set_page_dirty(ipage);
+
+ /* we don't need to mark_inode_dirty now */
+ F2FS_I(inode)->i_pino = dir->i_ino;
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+
+ update_parent_metadata(dir, inode, 0);
+fail:
+ up_write(&F2FS_I(inode)->i_sem);
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
+ update_inode(dir, ipage);
+ clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
+out:
+ f2fs_put_page(ipage, 1);
+ return err;
+}
+
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
+ struct inode *dir, struct inode *inode)
+{
+ struct f2fs_inline_dentry *inline_dentry;
+ int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
+ unsigned int bit_pos;
+ int i;
+
+ lock_page(page);
+ f2fs_wait_on_page_writeback(page, NODE);
+
+ inline_dentry = inline_data_addr(page);
+ bit_pos = dentry - inline_dentry->dentry;
+ for (i = 0; i < slots; i++)
+ test_and_clear_bit_le(bit_pos + i,
+ &inline_dentry->dentry_bitmap);
+
+ set_page_dirty(page);
+
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+
+ if (inode)
+ f2fs_drop_nlink(dir, inode, page);
+
+ f2fs_put_page(page, 1);
+}
+
+bool f2fs_empty_inline_dir(struct inode *dir)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ unsigned int bit_pos = 2;
+ struct f2fs_inline_dentry *dentry_blk;
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return false;
+
+ dentry_blk = inline_data_addr(ipage);
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+ NR_INLINE_DENTRY,
+ bit_pos);
+
+ f2fs_put_page(ipage, 1);
+
+ if (bit_pos < NR_INLINE_DENTRY)
+ return false;
+
+ return true;
+}
+
+int f2fs_read_inline_dir(struct file *file, void *dirent, filldir_t filldir)
+{
+ unsigned long pos = file->f_pos;
+ unsigned int bit_pos = 0;
+ struct inode *inode = file_inode(file);
+ struct f2fs_inline_dentry *inline_dentry = NULL;
+ struct page *ipage = NULL;
+ struct f2fs_dentry_ptr d;
+
+ if (pos >= NR_INLINE_DENTRY)
+ return 0;
+
+ bit_pos = (pos % NR_INLINE_DENTRY);
+
+ ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ if (IS_ERR(ipage))
+ return PTR_ERR(ipage);
+
+ inline_dentry = inline_data_addr(ipage);
+
+ make_dentry_ptr(&d, (void *)inline_dentry, 2);
+
+ if (!f2fs_fill_dentries(file, dirent, filldir, &d, 0, bit_pos))
+ file->f_pos = NR_INLINE_DENTRY;
+
+ f2fs_put_page(ipage, 1);
+ return 0;
+}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
new file mode 100644
index 0000000..8a75669
--- /dev/null
+++ b/fs/f2fs/inode.c
@@ -0,0 +1,361 @@
+/*
+ * fs/f2fs/inode.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+
+#include "f2fs.h"
+#include "node.h"
+
+#include <trace/events/f2fs.h>
+
+void f2fs_set_inode_flags(struct inode *inode)
+{
+ unsigned int flags = F2FS_I(inode)->i_flags;
+
+ inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
+ S_NOATIME | S_DIRSYNC);
+
+ if (flags & FS_SYNC_FL)
+ inode->i_flags |= S_SYNC;
+ if (flags & FS_APPEND_FL)
+ inode->i_flags |= S_APPEND;
+ if (flags & FS_IMMUTABLE_FL)
+ inode->i_flags |= S_IMMUTABLE;
+ if (flags & FS_NOATIME_FL)
+ inode->i_flags |= S_NOATIME;
+ if (flags & FS_DIRSYNC_FL)
+ inode->i_flags |= S_DIRSYNC;
+}
+
+static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+{
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ if (ri->i_addr[0])
+ inode->i_rdev =
+ old_decode_dev(le32_to_cpu(ri->i_addr[0]));
+ else
+ inode->i_rdev =
+ new_decode_dev(le32_to_cpu(ri->i_addr[1]));
+ }
+}
+
+static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
+{
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ if (old_valid_dev(inode->i_rdev)) {
+ ri->i_addr[0] =
+ cpu_to_le32(old_encode_dev(inode->i_rdev));
+ ri->i_addr[1] = 0;
+ } else {
+ ri->i_addr[0] = 0;
+ ri->i_addr[1] =
+ cpu_to_le32(new_encode_dev(inode->i_rdev));
+ ri->i_addr[2] = 0;
+ }
+ }
+}
+
+static void __recover_inline_status(struct inode *inode, struct page *ipage)
+{
+ void *inline_data = inline_data_addr(ipage);
+ __le32 *start = inline_data;
+ __le32 *end = start + MAX_INLINE_DATA / sizeof(__le32);
+
+ while (start < end) {
+ if (*start++) {
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+ set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage));
+ set_page_dirty(ipage);
+ return;
+ }
+ }
+ return;
+}
+
+static int do_read_inode(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct page *node_page;
+ struct f2fs_inode *ri;
+
+ /* Check if ino is within scope */
+ if (check_nid_range(sbi, inode->i_ino)) {
+ f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
+ (unsigned long) inode->i_ino);
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ node_page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(node_page))
+ return PTR_ERR(node_page);
+
+ ri = F2FS_INODE(node_page);
+
+ inode->i_mode = le16_to_cpu(ri->i_mode);
+ inode->i_uid = le32_to_cpu(ri->i_uid);
+ inode->i_gid = le32_to_cpu(ri->i_gid);
+ set_nlink(inode, le32_to_cpu(ri->i_links));
+ inode->i_size = le64_to_cpu(ri->i_size);
+ inode->i_blocks = le64_to_cpu(ri->i_blocks);
+
+ inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
+ inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
+ inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
+ inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
+ inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
+ inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
+ inode->i_generation = le32_to_cpu(ri->i_generation);
+
+ fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
+ fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
+ fi->i_flags = le32_to_cpu(ri->i_flags);
+ fi->flags = 0;
+ fi->i_advise = ri->i_advise;
+ fi->i_pino = le32_to_cpu(ri->i_pino);
+ fi->i_dir_level = ri->i_dir_level;
+
+ get_extent_info(&fi->ext, ri->i_ext);
+ get_inline_info(fi, ri);
+
+ /* check data exist */
+ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
+ __recover_inline_status(inode, node_page);
+
+ /* get rdev by using inline_info */
+ __get_inode_rdev(inode, ri);
+
+ f2fs_put_page(node_page, 1);
+
+ stat_inc_inline_inode(inode);
+ stat_inc_inline_dir(inode);
+
+ return 0;
+}
+
+struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct inode *inode;
+ int ret = 0;
+
+ inode = iget_locked(sb, ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ if (!(inode->i_state & I_NEW)) {
+ trace_f2fs_iget(inode);
+ return inode;
+ }
+ if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
+ goto make_now;
+
+ ret = do_read_inode(inode);
+ if (ret)
+ goto bad_inode;
+make_now:
+ if (ino == F2FS_NODE_INO(sbi)) {
+ inode->i_mapping->a_ops = &f2fs_node_aops;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+ } else if (ino == F2FS_META_INO(sbi)) {
+ inode->i_mapping->a_ops = &f2fs_meta_aops;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+ } else if (S_ISREG(inode->i_mode)) {
+ inode->i_op = &f2fs_file_inode_operations;
+ inode->i_fop = &f2fs_file_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &f2fs_dir_inode_operations;
+ inode->i_fop = &f2fs_dir_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
+ } else if (S_ISLNK(inode->i_mode)) {
+ inode->i_op = &f2fs_symlink_inode_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ inode->i_op = &f2fs_special_inode_operations;
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ } else {
+ ret = -EIO;
+ goto bad_inode;
+ }
+ unlock_new_inode(inode);
+ trace_f2fs_iget(inode);
+ return inode;
+
+bad_inode:
+ iget_failed(inode);
+ trace_f2fs_iget_exit(inode, ret);
+ return ERR_PTR(ret);
+}
+
+void update_inode(struct inode *inode, struct page *node_page)
+{
+ struct f2fs_inode *ri;
+
+ f2fs_wait_on_page_writeback(node_page, NODE);
+
+ ri = F2FS_INODE(node_page);
+
+ ri->i_mode = cpu_to_le16(inode->i_mode);
+ ri->i_advise = F2FS_I(inode)->i_advise;
+ ri->i_uid = cpu_to_le32(inode->i_uid);
+ ri->i_gid = cpu_to_le32(inode->i_gid);
+ ri->i_links = cpu_to_le32(inode->i_nlink);
+ ri->i_size = cpu_to_le64(i_size_read(inode));
+ ri->i_blocks = cpu_to_le64(inode->i_blocks);
+ set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
+ set_raw_inline(F2FS_I(inode), ri);
+
+ ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
+ ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+ ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
+ ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
+ ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth);
+ ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
+ ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
+ ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
+ ri->i_generation = cpu_to_le32(inode->i_generation);
+ ri->i_dir_level = F2FS_I(inode)->i_dir_level;
+
+ __set_inode_rdev(inode, ri);
+ set_cold_node(inode, node_page);
+ set_page_dirty(node_page);
+
+ clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
+}
+
+void update_inode_page(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct page *node_page;
+retry:
+ node_page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(node_page)) {
+ int err = PTR_ERR(node_page);
+ if (err == -ENOMEM) {
+ cond_resched();
+ goto retry;
+ } else if (err != -ENOENT) {
+ f2fs_stop_checkpoint(sbi);
+ }
+ return;
+ }
+ update_inode(inode, node_page);
+ f2fs_put_page(node_page, 1);
+}
+
+int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ if (inode->i_ino == F2FS_NODE_INO(sbi) ||
+ inode->i_ino == F2FS_META_INO(sbi))
+ return 0;
+
+ if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE))
+ return 0;
+
+ /*
+ * We need to lock here to prevent from producing dirty node pages
+ * during the urgent cleaning time when runing out of free sections.
+ */
+ f2fs_lock_op(sbi);
+ update_inode_page(inode);
+ f2fs_unlock_op(sbi);
+
+ if (wbc)
+ f2fs_balance_fs(sbi);
+
+ return 0;
+}
+
+/*
+ * Called at the last iput() if i_nlink is zero
+ */
+void f2fs_evict_inode(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+
+ /* some remained atomic pages should discarded */
+ if (f2fs_is_atomic_file(inode))
+ commit_inmem_pages(inode, true);
+
+ trace_f2fs_evict_inode(inode);
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (inode->i_ino == F2FS_NODE_INO(sbi) ||
+ inode->i_ino == F2FS_META_INO(sbi))
+ goto out_clear;
+
+ f2fs_bug_on(sbi, get_dirty_pages(inode));
+ remove_dirty_dir_inode(inode);
+
+ if (inode->i_nlink || is_bad_inode(inode))
+ goto no_delete;
+
+ set_inode_flag(F2FS_I(inode), FI_NO_ALLOC);
+ i_size_write(inode, 0);
+
+ if (F2FS_HAS_BLOCKS(inode))
+ f2fs_truncate(inode);
+
+ f2fs_lock_op(sbi);
+ remove_inode_page(inode);
+ f2fs_unlock_op(sbi);
+
+no_delete:
+ stat_dec_inline_dir(inode);
+ stat_dec_inline_inode(inode);
+ invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
+ if (xnid)
+ invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
+ if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
+out_clear:
+ end_writeback(inode);
+}
+
+/* caller should call f2fs_lock_op() */
+void handle_failed_inode(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+ clear_nlink(inode);
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+
+ i_size_write(inode, 0);
+ if (F2FS_HAS_BLOCKS(inode))
+ f2fs_truncate(inode);
+
+ remove_inode_page(inode);
+
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
+ alloc_nid_failed(sbi, inode->i_ino);
+ f2fs_unlock_op(sbi);
+
+ /* iput will drop the inode object */
+ iput(inode);
+}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
new file mode 100644
index 0000000..63094b3
--- /dev/null
+++ b/fs/f2fs/namei.c
@@ -0,0 +1,545 @@
+/*
+ * fs/f2fs/namei.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+#include <linux/ctype.h>
+#include <linux/dcache.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "xattr.h"
+#include "acl.h"
+#include <trace/events/f2fs.h>
+
+static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ nid_t ino;
+ struct inode *inode;
+ bool nid_free = false;
+ int err;
+
+ inode = new_inode(dir->i_sb);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ f2fs_lock_op(sbi);
+ if (!alloc_nid(sbi, &ino)) {
+ f2fs_unlock_op(sbi);
+ err = -ENOSPC;
+ goto fail;
+ }
+ f2fs_unlock_op(sbi);
+
+ inode_init_owner(inode, dir, mode);
+
+ inode->i_ino = ino;
+ inode->i_blocks = 0;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_generation = sbi->s_next_generation++;
+
+ err = insert_inode_locked(inode);
+ if (err) {
+ err = -EINVAL;
+ nid_free = true;
+ goto out;
+ }
+
+ if (f2fs_may_inline(inode))
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode))
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
+
+ trace_f2fs_new_inode(inode, 0);
+ mark_inode_dirty(inode);
+ return inode;
+
+out:
+ clear_nlink(inode);
+ unlock_new_inode(inode);
+fail:
+ trace_f2fs_new_inode(inode, err);
+ make_bad_inode(inode);
+ iput(inode);
+ if (nid_free)
+ alloc_nid_failed(sbi, ino);
+ return ERR_PTR(err);
+}
+
+static int is_multimedia_file(const unsigned char *s, const char *sub)
+{
+ size_t slen = strlen(s);
+ size_t sublen = strlen(sub);
+
+ if (sublen > slen)
+ return 0;
+
+ return !strncasecmp(s + slen - sublen, sub, sublen);
+}
+
+/*
+ * Set multimedia files as cold files for hot/cold data separation
+ */
+static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
+ const unsigned char *name)
+{
+ int i;
+ __u8 (*extlist)[8] = sbi->raw_super->extension_list;
+
+ int count = le32_to_cpu(sbi->raw_super->extension_count);
+ for (i = 0; i < count; i++) {
+ if (is_multimedia_file(name, extlist[i])) {
+ file_set_cold(inode);
+ break;
+ }
+ }
+}
+
+static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ struct nameidata *nd)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct inode *inode;
+ nid_t ino = 0;
+ int err;
+
+ f2fs_balance_fs(sbi);
+
+ inode = f2fs_new_inode(dir, mode);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
+ set_cold_files(sbi, inode, dentry->d_name.name);
+
+ inode->i_op = &f2fs_file_inode_operations;
+ inode->i_fop = &f2fs_file_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+ ino = inode->i_ino;
+
+ f2fs_lock_op(sbi);
+ err = f2fs_add_link(dentry, inode);
+ if (err)
+ goto out;
+ f2fs_unlock_op(sbi);
+
+ alloc_nid_done(sbi, ino);
+
+ stat_inc_inline_inode(inode);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
+ return 0;
+out:
+ handle_failed_inode(inode);
+ return err;
+}
+
+static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ struct inode *inode = old_dentry->d_inode;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ int err;
+
+ f2fs_balance_fs(sbi);
+
+ inode->i_ctime = CURRENT_TIME;
+ ihold(inode);
+
+ set_inode_flag(F2FS_I(inode), FI_INC_LINK);
+ f2fs_lock_op(sbi);
+ err = f2fs_add_link(dentry, inode);
+ if (err)
+ goto out;
+ f2fs_unlock_op(sbi);
+
+ d_instantiate(dentry, inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
+ return 0;
+out:
+ clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+ iput(inode);
+ f2fs_unlock_op(sbi);
+ return err;
+}
+
+struct dentry *f2fs_get_parent(struct dentry *child)
+{
+ struct qstr dotdot = {.len = 2, .name = ".."};
+ unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot);
+ if (!ino)
+ return ERR_PTR(-ENOENT);
+ return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
+}
+
+static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct inode *inode = NULL;
+ struct f2fs_dir_entry *de;
+ struct page *page;
+
+ if (dentry->d_name.len > F2FS_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ de = f2fs_find_entry(dir, &dentry->d_name, &page);
+ if (de) {
+ nid_t ino = le32_to_cpu(de->ino);
+ f2fs_dentry_kunmap(dir, page);
+ f2fs_put_page(page, 0);
+
+ inode = f2fs_iget(dir->i_sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ }
+
+ return d_splice_alias(inode, dentry);
+}
+
+static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct inode *inode = dentry->d_inode;
+ struct f2fs_dir_entry *de;
+ struct page *page;
+ int err = -ENOENT;
+
+ trace_f2fs_unlink_enter(dir, dentry);
+ f2fs_balance_fs(sbi);
+
+ de = f2fs_find_entry(dir, &dentry->d_name, &page);
+ if (!de)
+ goto fail;
+
+ f2fs_lock_op(sbi);
+ err = acquire_orphan_inode(sbi);
+ if (err) {
+ f2fs_unlock_op(sbi);
+ f2fs_dentry_kunmap(dir, page);
+ f2fs_put_page(page, 0);
+ goto fail;
+ }
+ f2fs_delete_entry(de, page, dir, inode);
+ f2fs_unlock_op(sbi);
+
+ /* In order to evict this inode, we set it dirty */
+ mark_inode_dirty(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
+fail:
+ trace_f2fs_unlink_exit(inode, err);
+ return err;
+}
+
+static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct inode *inode;
+ size_t symlen = strlen(symname) + 1;
+ int err;
+
+ f2fs_balance_fs(sbi);
+
+ inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->i_op = &f2fs_symlink_inode_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+
+ f2fs_lock_op(sbi);
+ err = f2fs_add_link(dentry, inode);
+ if (err)
+ goto out;
+ f2fs_unlock_op(sbi);
+
+ err = page_symlink(inode, symname, symlen);
+ alloc_nid_done(sbi, inode->i_ino);
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
+ return err;
+out:
+ handle_failed_inode(inode);
+ return err;
+}
+
+static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct inode *inode;
+ int err;
+
+ f2fs_balance_fs(sbi);
+
+ inode = f2fs_new_inode(dir, S_IFDIR | mode);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->i_op = &f2fs_dir_inode_operations;
+ inode->i_fop = &f2fs_dir_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
+
+ set_inode_flag(F2FS_I(inode), FI_INC_LINK);
+ f2fs_lock_op(sbi);
+ err = f2fs_add_link(dentry, inode);
+ if (err)
+ goto out_fail;
+ f2fs_unlock_op(sbi);
+
+ stat_inc_inline_dir(inode);
+ alloc_nid_done(sbi, inode->i_ino);
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
+ return 0;
+
+out_fail:
+ clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+ handle_failed_inode(inode);
+ return err;
+}
+
+static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ if (f2fs_empty_dir(inode))
+ return f2fs_unlink(dir, dentry);
+ return -ENOTEMPTY;
+}
+
+static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
+ umode_t mode, dev_t rdev)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct inode *inode;
+ int err = 0;
+
+ if (!new_valid_dev(rdev))
+ return -EINVAL;
+
+ f2fs_balance_fs(sbi);
+
+ inode = f2fs_new_inode(dir, mode);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ init_special_inode(inode, inode->i_mode, rdev);
+ inode->i_op = &f2fs_special_inode_operations;
+
+ f2fs_lock_op(sbi);
+ err = f2fs_add_link(dentry, inode);
+ if (err)
+ goto out;
+ f2fs_unlock_op(sbi);
+
+ alloc_nid_done(sbi, inode->i_ino);
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
+ return 0;
+out:
+ handle_failed_inode(inode);
+ return err;
+}
+
+static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
+ struct inode *old_inode = old_dentry->d_inode;
+ struct inode *new_inode = new_dentry->d_inode;
+ struct page *old_dir_page;
+ struct page *old_page, *new_page;
+ struct f2fs_dir_entry *old_dir_entry = NULL;
+ struct f2fs_dir_entry *old_entry;
+ struct f2fs_dir_entry *new_entry;
+ int err = -ENOENT;
+
+ f2fs_balance_fs(sbi);
+
+ old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
+ if (!old_entry)
+ goto out;
+
+ if (S_ISDIR(old_inode->i_mode)) {
+ err = -EIO;
+ old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page);
+ if (!old_dir_entry)
+ goto out_old;
+ }
+
+ if (new_inode) {
+
+ err = -ENOTEMPTY;
+ if (old_dir_entry && !f2fs_empty_dir(new_inode))
+ goto out_dir;
+
+ err = -ENOENT;
+ new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name,
+ &new_page);
+ if (!new_entry)
+ goto out_dir;
+
+ f2fs_lock_op(sbi);
+
+ err = acquire_orphan_inode(sbi);
+ if (err)
+ goto put_out_dir;
+
+ if (update_dent_inode(old_inode, &new_dentry->d_name)) {
+ release_orphan_inode(sbi);
+ goto put_out_dir;
+ }
+
+ f2fs_set_link(new_dir, new_entry, new_page, old_inode);
+
+ new_inode->i_ctime = CURRENT_TIME;
+ down_write(&F2FS_I(new_inode)->i_sem);
+ if (old_dir_entry)
+ drop_nlink(new_inode);
+ drop_nlink(new_inode);
+ up_write(&F2FS_I(new_inode)->i_sem);
+
+ mark_inode_dirty(new_inode);
+
+ if (!new_inode->i_nlink)
+ add_orphan_inode(sbi, new_inode->i_ino);
+ else
+ release_orphan_inode(sbi);
+
+ update_inode_page(old_inode);
+ update_inode_page(new_inode);
+ } else {
+ f2fs_lock_op(sbi);
+
+ err = f2fs_add_link(new_dentry, old_inode);
+ if (err) {
+ f2fs_unlock_op(sbi);
+ goto out_dir;
+ }
+
+ if (old_dir_entry) {
+ inc_nlink(new_dir);
+ update_inode_page(new_dir);
+ }
+ }
+
+ down_write(&F2FS_I(old_inode)->i_sem);
+ file_lost_pino(old_inode);
+ up_write(&F2FS_I(old_inode)->i_sem);
+
+ old_inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(old_inode);
+
+ f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
+
+ if (old_dir_entry) {
+ if (old_dir != new_dir) {
+ f2fs_set_link(old_inode, old_dir_entry,
+ old_dir_page, new_dir);
+ update_inode_page(old_inode);
+ } else {
+ f2fs_dentry_kunmap(old_inode, old_dir_page);
+ f2fs_put_page(old_dir_page, 0);
+ }
+ drop_nlink(old_dir);
+ mark_inode_dirty(old_dir);
+ update_inode_page(old_dir);
+ }
+
+ f2fs_unlock_op(sbi);
+
+ if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+ f2fs_sync_fs(sbi->sb, 1);
+ return 0;
+
+put_out_dir:
+ f2fs_unlock_op(sbi);
+ f2fs_dentry_kunmap(new_dir, new_page);
+ f2fs_put_page(new_page, 0);
+out_dir:
+ if (old_dir_entry) {
+ f2fs_dentry_kunmap(old_inode, old_dir_page);
+ f2fs_put_page(old_dir_page, 0);
+ }
+out_old:
+ f2fs_dentry_kunmap(old_dir, old_page);
+ f2fs_put_page(old_page, 0);
+out:
+ return err;
+}
+
+const struct inode_operations f2fs_dir_inode_operations = {
+ .create = f2fs_create,
+ .lookup = f2fs_lookup,
+ .link = f2fs_link,
+ .unlink = f2fs_unlink,
+ .symlink = f2fs_symlink,
+ .mkdir = f2fs_mkdir,
+ .rmdir = f2fs_rmdir,
+ .mknod = f2fs_mknod,
+ .rename = f2fs_rename,
+ .getattr = f2fs_getattr,
+ .setattr = f2fs_setattr,
+ .get_acl = f2fs_get_acl,
+#ifdef CONFIG_F2FS_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = f2fs_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+};
+
+const struct inode_operations f2fs_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = page_follow_link_light,
+ .put_link = page_put_link,
+ .getattr = f2fs_getattr,
+ .setattr = f2fs_setattr,
+#ifdef CONFIG_F2FS_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = f2fs_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+};
+
+const struct inode_operations f2fs_special_inode_operations = {
+ .getattr = f2fs_getattr,
+ .setattr = f2fs_setattr,
+ .get_acl = f2fs_get_acl,
+#ifdef CONFIG_F2FS_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = f2fs_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
new file mode 100644
index 0000000..23c8fb5
--- /dev/null
+++ b/fs/f2fs/node.c
@@ -0,0 +1,2072 @@
+/*
+ * fs/f2fs/node.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/mpage.h>
+#include <linux/backing-dev.h>
+#include <linux/blkdev.h>
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "trace.h"
+#include <trace/events/f2fs.h>
+
+#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
+
+static struct kmem_cache *nat_entry_slab;
+static struct kmem_cache *free_nid_slab;
+static struct kmem_cache *nat_entry_set_slab;
+
+bool available_free_memory(struct f2fs_sb_info *sbi, int type)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct sysinfo val;
+ unsigned long avail_ram;
+ unsigned long mem_size = 0;
+ bool res = false;
+
+ si_meminfo(&val);
+
+ /* only uses low memory */
+ avail_ram = val.totalram - val.totalhigh;
+
+ /* give 25%, 25%, 50%, 50% memory for each components respectively */
+ if (type == FREE_NIDS) {
+ mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
+ PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == NAT_ENTRIES) {
+ mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
+ PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == DIRTY_DENTS) {
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return false;
+ mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+ } else if (type == INO_ENTRIES) {
+ int i;
+
+ for (i = 0; i <= UPDATE_INO; i++)
+ mem_size += (sbi->im[i].ino_num *
+ sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+ } else {
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return false;
+ }
+ return res;
+}
+
+static void clear_node_page_dirty(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ unsigned int long flags;
+
+ if (PageDirty(page)) {
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ radix_tree_tag_clear(&mapping->page_tree,
+ page_index(page),
+ PAGECACHE_TAG_DIRTY);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+
+ clear_page_dirty_for_io(page);
+ dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
+ }
+ ClearPageUptodate(page);
+}
+
+static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ pgoff_t index = current_nat_addr(sbi, nid);
+ return get_meta_page(sbi, index);
+}
+
+static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct page *src_page;
+ struct page *dst_page;
+ pgoff_t src_off;
+ pgoff_t dst_off;
+ void *src_addr;
+ void *dst_addr;
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ src_off = current_nat_addr(sbi, nid);
+ dst_off = next_nat_addr(sbi, src_off);
+
+ /* get current nat block page with lock */
+ src_page = get_meta_page(sbi, src_off);
+ dst_page = grab_meta_page(sbi, dst_off);
+ f2fs_bug_on(sbi, PageDirty(src_page));
+
+ src_addr = page_address(src_page);
+ dst_addr = page_address(dst_page);
+ memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+ set_page_dirty(dst_page);
+ f2fs_put_page(src_page, 1);
+
+ set_to_next_nat(nm_i, nid);
+
+ return dst_page;
+}
+
+static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
+{
+ return radix_tree_lookup(&nm_i->nat_root, n);
+}
+
+static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
+ nid_t start, unsigned int nr, struct nat_entry **ep)
+{
+ return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
+}
+
+static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
+{
+ list_del(&e->list);
+ radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
+ nm_i->nat_cnt--;
+ kmem_cache_free(nat_entry_slab, e);
+}
+
+static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
+ struct nat_entry *ne)
+{
+ nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
+ struct nat_entry_set *head;
+
+ if (get_nat_flag(ne, IS_DIRTY))
+ return;
+
+ head = radix_tree_lookup(&nm_i->nat_set_root, set);
+ if (!head) {
+ head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
+
+ INIT_LIST_HEAD(&head->entry_list);
+ INIT_LIST_HEAD(&head->set_list);
+ head->set = set;
+ head->entry_cnt = 0;
+ f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
+ }
+ list_move_tail(&ne->list, &head->entry_list);
+ nm_i->dirty_nat_cnt++;
+ head->entry_cnt++;
+ set_nat_flag(ne, IS_DIRTY, true);
+}
+
+static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
+ struct nat_entry *ne)
+{
+ nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
+ struct nat_entry_set *head;
+
+ head = radix_tree_lookup(&nm_i->nat_set_root, set);
+ if (head) {
+ list_move_tail(&ne->list, &nm_i->nat_entries);
+ set_nat_flag(ne, IS_DIRTY, false);
+ head->entry_cnt--;
+ nm_i->dirty_nat_cnt--;
+ }
+}
+
+static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
+ nid_t start, unsigned int nr, struct nat_entry_set **ep)
+{
+ return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
+ start, nr);
+}
+
+bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+ bool is_cp = true;
+
+ down_read(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e && !get_nat_flag(e, IS_CHECKPOINTED))
+ is_cp = false;
+ up_read(&nm_i->nat_tree_lock);
+ return is_cp;
+}
+
+bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+ bool fsynced = false;
+
+ down_read(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, ino);
+ if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
+ fsynced = true;
+ up_read(&nm_i->nat_tree_lock);
+ return fsynced;
+}
+
+bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+ bool need_update = true;
+
+ down_read(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, ino);
+ if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
+ (get_nat_flag(e, IS_CHECKPOINTED) ||
+ get_nat_flag(e, HAS_FSYNCED_INODE)))
+ need_update = false;
+ up_read(&nm_i->nat_tree_lock);
+ return need_update;
+}
+
+static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
+{
+ struct nat_entry *new;
+
+ new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
+ f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
+ memset(new, 0, sizeof(struct nat_entry));
+ nat_set_nid(new, nid);
+ nat_reset_flag(new);
+ list_add_tail(&new->list, &nm_i->nat_entries);
+ nm_i->nat_cnt++;
+ return new;
+}
+
+static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
+ struct f2fs_nat_entry *ne)
+{
+ struct nat_entry *e;
+
+ down_write(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (!e) {
+ e = grab_nat_entry(nm_i, nid);
+ node_info_from_raw_nat(&e->ni, ne);
+ }
+ up_write(&nm_i->nat_tree_lock);
+}
+
+static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
+ block_t new_blkaddr, bool fsync_done)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+
+ down_write(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, ni->nid);
+ if (!e) {
+ e = grab_nat_entry(nm_i, ni->nid);
+ copy_node_info(&e->ni, ni);
+ f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
+ } else if (new_blkaddr == NEW_ADDR) {
+ /*
+ * when nid is reallocated,
+ * previous nat entry can be remained in nat cache.
+ * So, reinitialize it with new information.
+ */
+ copy_node_info(&e->ni, ni);
+ f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
+ }
+
+ /* sanity check */
+ f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
+ f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
+ new_blkaddr == NULL_ADDR);
+ f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
+ new_blkaddr == NEW_ADDR);
+ f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
+ nat_get_blkaddr(e) != NULL_ADDR &&
+ new_blkaddr == NEW_ADDR);
+
+ /* increment version no as node is removed */
+ if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
+ unsigned char version = nat_get_version(e);
+ nat_set_version(e, inc_node_version(version));
+ }
+
+ /* change address */
+ nat_set_blkaddr(e, new_blkaddr);
+ if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
+ set_nat_flag(e, IS_CHECKPOINTED, false);
+ __set_nat_cache_dirty(nm_i, e);
+
+ /* update fsync_mark if its inode nat entry is still alive */
+ e = __lookup_nat_cache(nm_i, ni->ino);
+ if (e) {
+ if (fsync_done && ni->nid == ni->ino)
+ set_nat_flag(e, HAS_FSYNCED_INODE, true);
+ set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
+ }
+ up_write(&nm_i->nat_tree_lock);
+}
+
+int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ if (available_free_memory(sbi, NAT_ENTRIES))
+ return 0;
+
+ down_write(&nm_i->nat_tree_lock);
+ while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
+ struct nat_entry *ne;
+ ne = list_first_entry(&nm_i->nat_entries,
+ struct nat_entry, list);
+ __del_from_nat_cache(nm_i, ne);
+ nr_shrink--;
+ }
+ up_write(&nm_i->nat_tree_lock);
+ return nr_shrink;
+}
+
+/*
+ * This function always returns success
+ */
+void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+ struct f2fs_summary_block *sum = curseg->sum_blk;
+ nid_t start_nid = START_NID(nid);
+ struct f2fs_nat_block *nat_blk;
+ struct page *page = NULL;
+ struct f2fs_nat_entry ne;
+ struct nat_entry *e;
+ int i;
+
+ ni->nid = nid;
+
+ /* Check nat cache */
+ down_read(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e) {
+ ni->ino = nat_get_ino(e);
+ ni->blk_addr = nat_get_blkaddr(e);
+ ni->version = nat_get_version(e);
+ }
+ up_read(&nm_i->nat_tree_lock);
+ if (e)
+ return;
+
+ memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+
+ /* Check current segment summary */
+ mutex_lock(&curseg->curseg_mutex);
+ i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
+ if (i >= 0) {
+ ne = nat_in_journal(sum, i);
+ node_info_from_raw_nat(ni, &ne);
+ }
+ mutex_unlock(&curseg->curseg_mutex);
+ if (i >= 0)
+ goto cache;
+
+ /* Fill node_info from nat page */
+ page = get_current_nat_page(sbi, start_nid);
+ nat_blk = (struct f2fs_nat_block *)page_address(page);
+ ne = nat_blk->entries[nid - start_nid];
+ node_info_from_raw_nat(ni, &ne);
+ f2fs_put_page(page, 1);
+cache:
+ /* cache nat entry */
+ cache_nat_entry(NM_I(sbi), nid, &ne);
+}
+
+/*
+ * The maximum depth is four.
+ * Offset[0] will have raw inode offset.
+ */
+static int get_node_path(struct f2fs_inode_info *fi, long block,
+ int offset[4], unsigned int noffset[4])
+{
+ const long direct_index = ADDRS_PER_INODE(fi);
+ const long direct_blks = ADDRS_PER_BLOCK;
+ const long dptrs_per_blk = NIDS_PER_BLOCK;
+ const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+ const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
+ int n = 0;
+ int level = 0;
+
+ noffset[0] = 0;
+
+ if (block < direct_index) {
+ offset[n] = block;
+ goto got;
+ }
+ block -= direct_index;
+ if (block < direct_blks) {
+ offset[n++] = NODE_DIR1_BLOCK;
+ noffset[n] = 1;
+ offset[n] = block;
+ level = 1;
+ goto got;
+ }
+ block -= direct_blks;
+ if (block < direct_blks) {
+ offset[n++] = NODE_DIR2_BLOCK;
+ noffset[n] = 2;
+ offset[n] = block;
+ level = 1;
+ goto got;
+ }
+ block -= direct_blks;
+ if (block < indirect_blks) {
+ offset[n++] = NODE_IND1_BLOCK;
+ noffset[n] = 3;
+ offset[n++] = block / direct_blks;
+ noffset[n] = 4 + offset[n - 1];
+ offset[n] = block % direct_blks;
+ level = 2;
+ goto got;
+ }
+ block -= indirect_blks;
+ if (block < indirect_blks) {
+ offset[n++] = NODE_IND2_BLOCK;
+ noffset[n] = 4 + dptrs_per_blk;
+ offset[n++] = block / direct_blks;
+ noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
+ offset[n] = block % direct_blks;
+ level = 2;
+ goto got;
+ }
+ block -= indirect_blks;
+ if (block < dindirect_blks) {
+ offset[n++] = NODE_DIND_BLOCK;
+ noffset[n] = 5 + (dptrs_per_blk * 2);
+ offset[n++] = block / indirect_blks;
+ noffset[n] = 6 + (dptrs_per_blk * 2) +
+ offset[n - 1] * (dptrs_per_blk + 1);
+ offset[n++] = (block / direct_blks) % dptrs_per_blk;
+ noffset[n] = 7 + (dptrs_per_blk * 2) +
+ offset[n - 2] * (dptrs_per_blk + 1) +
+ offset[n - 1];
+ offset[n] = block % direct_blks;
+ level = 3;
+ goto got;
+ } else {
+ BUG();
+ }
+got:
+ return level;
+}
+
+/*
+ * Caller should call f2fs_put_dnode(dn).
+ * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
+ * In the case of RDONLY_NODE, we don't need to care about mutex.
+ */
+int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ struct page *npage[4];
+ struct page *parent = NULL;
+ int offset[4];
+ unsigned int noffset[4];
+ nid_t nids[4];
+ int level, i;
+ int err = 0;
+
+ level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
+
+ nids[0] = dn->inode->i_ino;
+ npage[0] = dn->inode_page;
+
+ if (!npage[0]) {
+ npage[0] = get_node_page(sbi, nids[0]);
+ if (IS_ERR(npage[0]))
+ return PTR_ERR(npage[0]);
+ }
+
+ /* if inline_data is set, should not report any block indices */
+ if (f2fs_has_inline_data(dn->inode) && index) {
+ err = -EINVAL;
+ f2fs_put_page(npage[0], 1);
+ goto release_out;
+ }
+
+ parent = npage[0];
+ if (level != 0)
+ nids[1] = get_nid(parent, offset[0], true);
+ dn->inode_page = npage[0];
+ dn->inode_page_locked = true;
+
+ /* get indirect or direct nodes */
+ for (i = 1; i <= level; i++) {
+ bool done = false;
+
+ if (!nids[i] && mode == ALLOC_NODE) {
+ /* alloc new node */
+ if (!alloc_nid(sbi, &(nids[i]))) {
+ err = -ENOSPC;
+ goto release_pages;
+ }
+
+ dn->nid = nids[i];
+ npage[i] = new_node_page(dn, noffset[i], NULL);
+ if (IS_ERR(npage[i])) {
+ alloc_nid_failed(sbi, nids[i]);
+ err = PTR_ERR(npage[i]);
+ goto release_pages;
+ }
+
+ set_nid(parent, offset[i - 1], nids[i], i == 1);
+ alloc_nid_done(sbi, nids[i]);
+ done = true;
+ } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
+ npage[i] = get_node_page_ra(parent, offset[i - 1]);
+ if (IS_ERR(npage[i])) {
+ err = PTR_ERR(npage[i]);
+ goto release_pages;
+ }
+ done = true;
+ }
+ if (i == 1) {
+ dn->inode_page_locked = false;
+ unlock_page(parent);
+ } else {
+ f2fs_put_page(parent, 1);
+ }
+
+ if (!done) {
+ npage[i] = get_node_page(sbi, nids[i]);
+ if (IS_ERR(npage[i])) {
+ err = PTR_ERR(npage[i]);
+ f2fs_put_page(npage[0], 0);
+ goto release_out;
+ }
+ }
+ if (i < level) {
+ parent = npage[i];
+ nids[i + 1] = get_nid(parent, offset[i], false);
+ }
+ }
+ dn->nid = nids[level];
+ dn->ofs_in_node = offset[level];
+ dn->node_page = npage[level];
+ dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
+ return 0;
+
+release_pages:
+ f2fs_put_page(parent, 1);
+ if (i > 1)
+ f2fs_put_page(npage[0], 0);
+release_out:
+ dn->inode_page = NULL;
+ dn->node_page = NULL;
+ return err;
+}
+
+static void truncate_node(struct dnode_of_data *dn)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ struct node_info ni;
+
+ get_node_info(sbi, dn->nid, &ni);
+ if (dn->inode->i_blocks == 0) {
+ f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
+ goto invalidate;
+ }
+ f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
+
+ /* Deallocate node address */
+ invalidate_blocks(sbi, ni.blk_addr);
+ dec_valid_node_count(sbi, dn->inode);
+ set_node_addr(sbi, &ni, NULL_ADDR, false);
+
+ if (dn->nid == dn->inode->i_ino) {
+ remove_orphan_inode(sbi, dn->nid);
+ dec_valid_inode_count(sbi);
+ } else {
+ sync_inode_page(dn);
+ }
+invalidate:
+ clear_node_page_dirty(dn->node_page);
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
+
+ f2fs_put_page(dn->node_page, 1);
+
+ invalidate_mapping_pages(NODE_MAPPING(sbi),
+ dn->node_page->index, dn->node_page->index);
+
+ dn->node_page = NULL;
+ trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
+}
+
+static int truncate_dnode(struct dnode_of_data *dn)
+{
+ struct page *page;
+
+ if (dn->nid == 0)
+ return 1;
+
+ /* get direct node */
+ page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
+ if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
+ return 1;
+ else if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ /* Make dnode_of_data for parameter */
+ dn->node_page = page;
+ dn->ofs_in_node = 0;
+ truncate_data_blocks(dn);
+ truncate_node(dn);
+ return 1;
+}
+
+static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
+ int ofs, int depth)
+{
+ struct dnode_of_data rdn = *dn;
+ struct page *page;
+ struct f2fs_node *rn;
+ nid_t child_nid;
+ unsigned int child_nofs;
+ int freed = 0;
+ int i, ret;
+
+ if (dn->nid == 0)
+ return NIDS_PER_BLOCK + 1;
+
+ trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
+
+ page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
+ if (IS_ERR(page)) {
+ trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
+ return PTR_ERR(page);
+ }
+
+ rn = F2FS_NODE(page);
+ if (depth < 3) {
+ for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
+ child_nid = le32_to_cpu(rn->in.nid[i]);
+ if (child_nid == 0)
+ continue;
+ rdn.nid = child_nid;
+ ret = truncate_dnode(&rdn);
+ if (ret < 0)
+ goto out_err;
+ set_nid(page, i, 0, false);
+ }
+ } else {
+ child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
+ for (i = ofs; i < NIDS_PER_BLOCK; i++) {
+ child_nid = le32_to_cpu(rn->in.nid[i]);
+ if (child_nid == 0) {
+ child_nofs += NIDS_PER_BLOCK + 1;
+ continue;
+ }
+ rdn.nid = child_nid;
+ ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
+ if (ret == (NIDS_PER_BLOCK + 1)) {
+ set_nid(page, i, 0, false);
+ child_nofs += ret;
+ } else if (ret < 0 && ret != -ENOENT) {
+ goto out_err;
+ }
+ }
+ freed = child_nofs;
+ }
+
+ if (!ofs) {
+ /* remove current indirect node */
+ dn->node_page = page;
+ truncate_node(dn);
+ freed++;
+ } else {
+ f2fs_put_page(page, 1);
+ }
+ trace_f2fs_truncate_nodes_exit(dn->inode, freed);
+ return freed;
+
+out_err:
+ f2fs_put_page(page, 1);
+ trace_f2fs_truncate_nodes_exit(dn->inode, ret);
+ return ret;
+}
+
+static int truncate_partial_nodes(struct dnode_of_data *dn,
+ struct f2fs_inode *ri, int *offset, int depth)
+{
+ struct page *pages[2];
+ nid_t nid[3];
+ nid_t child_nid;
+ int err = 0;
+ int i;
+ int idx = depth - 2;
+
+ nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
+ if (!nid[0])
+ return 0;
+
+ /* get indirect nodes in the path */
+ for (i = 0; i < idx + 1; i++) {
+ /* reference count'll be increased */
+ pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
+ if (IS_ERR(pages[i])) {
+ err = PTR_ERR(pages[i]);
+ idx = i - 1;
+ goto fail;
+ }
+ nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
+ }
+
+ /* free direct nodes linked to a partial indirect node */
+ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
+ child_nid = get_nid(pages[idx], i, false);
+ if (!child_nid)
+ continue;
+ dn->nid = child_nid;
+ err = truncate_dnode(dn);
+ if (err < 0)
+ goto fail;
+ set_nid(pages[idx], i, 0, false);
+ }
+
+ if (offset[idx + 1] == 0) {
+ dn->node_page = pages[idx];
+ dn->nid = nid[idx];
+ truncate_node(dn);
+ } else {
+ f2fs_put_page(pages[idx], 1);
+ }
+ offset[idx]++;
+ offset[idx + 1] = 0;
+ idx--;
+fail:
+ for (i = idx; i >= 0; i--)
+ f2fs_put_page(pages[i], 1);
+
+ trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
+
+ return err;
+}
+
+/*
+ * All the block addresses of data and nodes should be nullified.
+ */
+int truncate_inode_blocks(struct inode *inode, pgoff_t from)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ int err = 0, cont = 1;
+ int level, offset[4], noffset[4];
+ unsigned int nofs = 0;
+ struct f2fs_inode *ri;
+ struct dnode_of_data dn;
+ struct page *page;
+
+ trace_f2fs_truncate_inode_blocks_enter(inode, from);
+
+ level = get_node_path(F2FS_I(inode), from, offset, noffset);
+restart:
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page)) {
+ trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
+ return PTR_ERR(page);
+ }
+
+ set_new_dnode(&dn, inode, page, NULL, 0);
+ unlock_page(page);
+
+ ri = F2FS_INODE(page);
+ switch (level) {
+ case 0:
+ case 1:
+ nofs = noffset[1];
+ break;
+ case 2:
+ nofs = noffset[1];
+ if (!offset[level - 1])
+ goto skip_partial;
+ err = truncate_partial_nodes(&dn, ri, offset, level);
+ if (err < 0 && err != -ENOENT)
+ goto fail;
+ nofs += 1 + NIDS_PER_BLOCK;
+ break;
+ case 3:
+ nofs = 5 + 2 * NIDS_PER_BLOCK;
+ if (!offset[level - 1])
+ goto skip_partial;
+ err = truncate_partial_nodes(&dn, ri, offset, level);
+ if (err < 0 && err != -ENOENT)
+ goto fail;
+ break;
+ default:
+ BUG();
+ }
+
+skip_partial:
+ while (cont) {
+ dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
+ switch (offset[0]) {
+ case NODE_DIR1_BLOCK:
+ case NODE_DIR2_BLOCK:
+ err = truncate_dnode(&dn);
+ break;
+
+ case NODE_IND1_BLOCK:
+ case NODE_IND2_BLOCK:
+ err = truncate_nodes(&dn, nofs, offset[1], 2);
+ break;
+
+ case NODE_DIND_BLOCK:
+ err = truncate_nodes(&dn, nofs, offset[1], 3);
+ cont = 0;
+ break;
+
+ default:
+ BUG();
+ }
+ if (err < 0 && err != -ENOENT)
+ goto fail;
+ if (offset[1] == 0 &&
+ ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
+ lock_page(page);
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+ f2fs_put_page(page, 1);
+ goto restart;
+ }
+ f2fs_wait_on_page_writeback(page, NODE);
+ ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
+ set_page_dirty(page);
+ unlock_page(page);
+ }
+ offset[1] = 0;
+ offset[0]++;
+ nofs += err;
+ }
+fail:
+ f2fs_put_page(page, 0);
+ trace_f2fs_truncate_inode_blocks_exit(inode, err);
+ return err > 0 ? 0 : err;
+}
+
+int truncate_xattr_node(struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ nid_t nid = F2FS_I(inode)->i_xattr_nid;
+ struct dnode_of_data dn;
+ struct page *npage;
+
+ if (!nid)
+ return 0;
+
+ npage = get_node_page(sbi, nid);
+ if (IS_ERR(npage))
+ return PTR_ERR(npage);
+
+ F2FS_I(inode)->i_xattr_nid = 0;
+
+ /* need to do checkpoint during fsync */
+ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
+
+ set_new_dnode(&dn, inode, page, npage, nid);
+
+ if (page)
+ dn.inode_page_locked = true;
+ truncate_node(&dn);
+ return 0;
+}
+
+/*
+ * Caller should grab and release a rwsem by calling f2fs_lock_op() and
+ * f2fs_unlock_op().
+ */
+void remove_inode_page(struct inode *inode)
+{
+ struct dnode_of_data dn;
+
+ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
+ if (get_dnode_of_data(&dn, 0, LOOKUP_NODE))
+ return;
+
+ if (truncate_xattr_node(inode, dn.inode_page)) {
+ f2fs_put_dnode(&dn);
+ return;
+ }
+
+ /* remove potential inline_data blocks */
+ if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode))
+ truncate_data_blocks_range(&dn, 1);
+
+ /* 0 is possible, after f2fs_new_inode() has failed */
+ f2fs_bug_on(F2FS_I_SB(inode),
+ inode->i_blocks != 0 && inode->i_blocks != 1);
+
+ /* will put inode & node pages */
+ truncate_node(&dn);
+}
+
+struct page *new_inode_page(struct inode *inode)
+{
+ struct dnode_of_data dn;
+
+ /* allocate inode page for new inode */
+ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
+
+ /* caller should f2fs_put_page(page, 1); */
+ return new_node_page(&dn, 0, NULL);
+}
+
+struct page *new_node_page(struct dnode_of_data *dn,
+ unsigned int ofs, struct page *ipage)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ struct node_info old_ni, new_ni;
+ struct page *page;
+ int err;
+
+ if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+ return ERR_PTR(-EPERM);
+
+ page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
+ err = -ENOSPC;
+ goto fail;
+ }
+
+ get_node_info(sbi, dn->nid, &old_ni);
+
+ /* Reinitialize old_ni with new node page */
+ f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
+ new_ni = old_ni;
+ new_ni.ino = dn->inode->i_ino;
+ set_node_addr(sbi, &new_ni, NEW_ADDR, false);
+
+ f2fs_wait_on_page_writeback(page, NODE);
+ fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
+ set_cold_node(dn->inode, page);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ if (f2fs_has_xattr_block(ofs))
+ F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
+
+ dn->node_page = page;
+ if (ipage)
+ update_inode(dn->inode, ipage);
+ else
+ sync_inode_page(dn);
+ if (ofs == 0)
+ inc_valid_inode_count(sbi);
+
+ return page;
+
+fail:
+ clear_node_page_dirty(page);
+ f2fs_put_page(page, 1);
+ return ERR_PTR(err);
+}
+
+/*
+ * Caller should do after getting the following values.
+ * 0: f2fs_put_page(page, 0)
+ * LOCKED_PAGE: f2fs_put_page(page, 1)
+ * error: nothing
+ */
+static int read_node_page(struct page *page, int rw)
+{
+ struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+ struct node_info ni;
+ struct f2fs_io_info fio = {
+ .type = NODE,
+ .rw = rw,
+ };
+
+ get_node_info(sbi, page->index, &ni);
+
+ if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ f2fs_put_page(page, 1);
+ return -ENOENT;
+ }
+
+ if (PageUptodate(page))
+ return LOCKED_PAGE;
+
+ fio.blk_addr = ni.blk_addr;
+ return f2fs_submit_page_bio(sbi, page, &fio);
+}
+
+/*
+ * Readahead a node page
+ */
+void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct page *apage;
+ int err;
+
+ apage = find_get_page(NODE_MAPPING(sbi), nid);
+ if (apage && PageUptodate(apage)) {
+ f2fs_put_page(apage, 0);
+ return;
+ }
+ f2fs_put_page(apage, 0);
+
+ apage = grab_cache_page(NODE_MAPPING(sbi), nid);
+ if (!apage)
+ return;
+
+ err = read_node_page(apage, READA);
+ if (err == 0)
+ f2fs_put_page(apage, 0);
+ else if (err == LOCKED_PAGE)
+ f2fs_put_page(apage, 1);
+}
+
+struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
+{
+ struct page *page;
+ int err;
+repeat:
+ page = grab_cache_page(NODE_MAPPING(sbi), nid);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ err = read_node_page(page, READ_SYNC);
+ if (err < 0)
+ return ERR_PTR(err);
+ else if (err != LOCKED_PAGE)
+ lock_page(page);
+
+ if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
+ ClearPageUptodate(page);
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
+ mark_page_accessed(page);
+ return page;
+}
+
+/*
+ * Return a locked page for the desired node page.
+ * And, readahead MAX_RA_NODE number of node pages.
+ */
+struct page *get_node_page_ra(struct page *parent, int start)
+{
+ struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
+ struct blk_plug plug;
+ struct page *page;
+ int err, i, end;
+ nid_t nid;
+
+ /* First, try getting the desired direct node. */
+ nid = get_nid(parent, start, false);
+ if (!nid)
+ return ERR_PTR(-ENOENT);
+repeat:
+ page = grab_cache_page(NODE_MAPPING(sbi), nid);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ err = read_node_page(page, READ_SYNC);
+ if (err < 0)
+ return ERR_PTR(err);
+ else if (err == LOCKED_PAGE)
+ goto page_hit;
+
+ blk_start_plug(&plug);
+
+ /* Then, try readahead for siblings of the desired node */
+ end = start + MAX_RA_NODE;
+ end = min(end, NIDS_PER_BLOCK);
+ for (i = start + 1; i < end; i++) {
+ nid = get_nid(parent, i, false);
+ if (!nid)
+ continue;
+ ra_node_page(sbi, nid);
+ }
+
+ blk_finish_plug(&plug);
+
+ lock_page(page);
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
+page_hit:
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
+ mark_page_accessed(page);
+ return page;
+}
+
+void sync_inode_page(struct dnode_of_data *dn)
+{
+ if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
+ update_inode(dn->inode, dn->node_page);
+ } else if (dn->inode_page) {
+ if (!dn->inode_page_locked)
+ lock_page(dn->inode_page);
+ update_inode(dn->inode, dn->inode_page);
+ if (!dn->inode_page_locked)
+ unlock_page(dn->inode_page);
+ } else {
+ update_inode_page(dn->inode);
+ }
+}
+
+int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
+ struct writeback_control *wbc)
+{
+ pgoff_t index, end;
+ struct pagevec pvec;
+ int step = ino ? 2 : 0;
+ int nwritten = 0, wrote = 0;
+
+ pagevec_init(&pvec, 0);
+
+next_step:
+ index = 0;
+ end = LONG_MAX;
+
+ while (index <= end) {
+ int i, nr_pages;
+ nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+ PAGECACHE_TAG_DIRTY,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ if (nr_pages == 0)
+ break;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ /*
+ * flushing sequence with step:
+ * 0. indirect nodes
+ * 1. dentry dnodes
+ * 2. file dnodes
+ */
+ if (step == 0 && IS_DNODE(page))
+ continue;
+ if (step == 1 && (!IS_DNODE(page) ||
+ is_cold_node(page)))
+ continue;
+ if (step == 2 && (!IS_DNODE(page) ||
+ !is_cold_node(page)))
+ continue;
+
+ /*
+ * If an fsync mode,
+ * we should not skip writing node pages.
+ */
+ if (ino && ino_of_node(page) == ino)
+ lock_page(page);
+ else if (!trylock_page(page))
+ continue;
+
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+continue_unlock:
+ unlock_page(page);
+ continue;
+ }
+ if (ino && ino_of_node(page) != ino)
+ goto continue_unlock;
+
+ if (!PageDirty(page)) {
+ /* someone wrote it for us */
+ goto continue_unlock;
+ }
+
+ if (!clear_page_dirty_for_io(page))
+ goto continue_unlock;
+
+ /* called by fsync() */
+ if (ino && IS_DNODE(page)) {
+ set_fsync_mark(page, 1);
+ if (IS_INODE(page)) {
+ if (!is_checkpointed_node(sbi, ino) &&
+ !has_fsynced_inode(sbi, ino))
+ set_dentry_mark(page, 1);
+ else
+ set_dentry_mark(page, 0);
+ }
+ nwritten++;
+ } else {
+ set_fsync_mark(page, 0);
+ set_dentry_mark(page, 0);
+ }
+
+ if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
+ unlock_page(page);
+ else
+ wrote++;
+
+ if (--wbc->nr_to_write == 0)
+ break;
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+
+ if (wbc->nr_to_write == 0) {
+ step = 2;
+ break;
+ }
+ }
+
+ if (step < 2) {
+ step++;
+ goto next_step;
+ }
+
+ if (wrote)
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ return nwritten;
+}
+
+int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ pgoff_t index = 0, end = LONG_MAX;
+ struct pagevec pvec;
+ int ret2 = 0, ret = 0;
+
+ pagevec_init(&pvec, 0);
+
+ while (index <= end) {
+ int i, nr_pages;
+ nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+ PAGECACHE_TAG_WRITEBACK,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ if (nr_pages == 0)
+ break;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ /* until radix tree lookup accepts end_index */
+ if (unlikely(page->index > end))
+ continue;
+
+ if (ino && ino_of_node(page) == ino) {
+ f2fs_wait_on_page_writeback(page, NODE);
+ if (TestClearPageError(page))
+ ret = -EIO;
+ }
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+
+ if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
+ ret2 = -ENOSPC;
+ if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
+ ret2 = -EIO;
+ if (!ret)
+ ret = ret2;
+ return ret;
+}
+
+static int f2fs_write_node_page(struct page *page,
+ struct writeback_control *wbc)
+{
+ struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+ nid_t nid;
+ struct node_info ni;
+ struct f2fs_io_info fio = {
+ .type = NODE,
+ .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ };
+
+ trace_f2fs_writepage(page, NODE);
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto redirty_out;
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
+
+ f2fs_wait_on_page_writeback(page, NODE);
+
+ /* get old block addr of this node page */
+ nid = nid_of_node(page);
+ f2fs_bug_on(sbi, page->index != nid);
+
+ get_node_info(sbi, nid, &ni);
+
+ /* This page is already truncated */
+ if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ unlock_page(page);
+ return 0;
+ }
+
+ if (wbc->for_reclaim) {
+ if (!down_read_trylock(&sbi->node_write))
+ goto redirty_out;
+ } else {
+ down_read(&sbi->node_write);
+ }
+
+ set_page_writeback(page);
+ fio.blk_addr = ni.blk_addr;
+ write_node_page(sbi, page, nid, &fio);
+ set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
+ dec_page_count(sbi, F2FS_DIRTY_NODES);
+ up_read(&sbi->node_write);
+ unlock_page(page);
+
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+
+ return 0;
+
+redirty_out:
+ redirty_page_for_writepage(wbc, page);
+ return AOP_WRITEPAGE_ACTIVATE;
+}
+
+static int f2fs_write_node_pages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ long diff;
+
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
+
+ /* balancing f2fs's metadata in background */
+ f2fs_balance_fs_bg(sbi);
+
+ /* collect a number of dirty node pages and write together */
+ if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+ goto skip_write;
+
+ diff = nr_pages_to_write(sbi, NODE, wbc);
+ wbc->sync_mode = WB_SYNC_NONE;
+ sync_node_pages(sbi, 0, wbc);
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
+ return 0;
+
+skip_write:
+ wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
+ return 0;
+}
+
+static int f2fs_set_node_page_dirty(struct page *page)
+{
+ trace_f2fs_set_page_dirty(page, NODE);
+
+ SetPageUptodate(page);
+ if (!PageDirty(page)) {
+ __set_page_dirty_nobuffers(page);
+ inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
+ SetPagePrivate(page);
+ f2fs_trace_pid(page);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Structure of the f2fs node operations
+ */
+const struct address_space_operations f2fs_node_aops = {
+ .writepage = f2fs_write_node_page,
+ .writepages = f2fs_write_node_pages,
+ .set_page_dirty = f2fs_set_node_page_dirty,
+ .invalidatepage = f2fs_invalidate_page,
+ .releasepage = f2fs_release_page,
+};
+
+static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
+ nid_t n)
+{
+ return radix_tree_lookup(&nm_i->free_nid_root, n);
+}
+
+static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
+ struct free_nid *i)
+{
+ list_del(&i->list);
+ radix_tree_delete(&nm_i->free_nid_root, i->nid);
+}
+
+static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct free_nid *i;
+ struct nat_entry *ne;
+ bool allocated = false;
+
+ if (!available_free_memory(sbi, FREE_NIDS))
+ return -1;
+
+ /* 0 nid should not be used */
+ if (unlikely(nid == 0))
+ return 0;
+
+ if (build) {
+ /* do not add allocated nids */
+ down_read(&nm_i->nat_tree_lock);
+ ne = __lookup_nat_cache(nm_i, nid);
+ if (ne &&
+ (!get_nat_flag(ne, IS_CHECKPOINTED) ||
+ nat_get_blkaddr(ne) != NULL_ADDR))
+ allocated = true;
+ up_read(&nm_i->nat_tree_lock);
+ if (allocated)
+ return 0;
+ }
+
+ i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
+ i->nid = nid;
+ i->state = NID_NEW;
+
+ if (radix_tree_preload(GFP_NOFS)) {
+ kmem_cache_free(free_nid_slab, i);
+ return 0;
+ }
+
+ spin_lock(&nm_i->free_nid_list_lock);
+ if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
+ spin_unlock(&nm_i->free_nid_list_lock);
+ radix_tree_preload_end();
+ kmem_cache_free(free_nid_slab, i);
+ return 0;
+ }
+ list_add_tail(&i->list, &nm_i->free_nid_list);
+ nm_i->fcnt++;
+ spin_unlock(&nm_i->free_nid_list_lock);
+ radix_tree_preload_end();
+ return 1;
+}
+
+static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
+{
+ struct free_nid *i;
+ bool need_free = false;
+
+ spin_lock(&nm_i->free_nid_list_lock);
+ i = __lookup_free_nid_list(nm_i, nid);
+ if (i && i->state == NID_NEW) {
+ __del_from_free_nid_list(nm_i, i);
+ nm_i->fcnt--;
+ need_free = true;
+ }
+ spin_unlock(&nm_i->free_nid_list_lock);
+
+ if (need_free)
+ kmem_cache_free(free_nid_slab, i);
+}
+
+static void scan_nat_page(struct f2fs_sb_info *sbi,
+ struct page *nat_page, nid_t start_nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct f2fs_nat_block *nat_blk = page_address(nat_page);
+ block_t blk_addr;
+ int i;
+
+ i = start_nid % NAT_ENTRY_PER_BLOCK;
+
+ for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
+
+ if (unlikely(start_nid >= nm_i->max_nid))
+ break;
+
+ blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
+ f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
+ if (blk_addr == NULL_ADDR) {
+ if (add_free_nid(sbi, start_nid, true) < 0)
+ break;
+ }
+ }
+}
+
+static void build_free_nids(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+ struct f2fs_summary_block *sum = curseg->sum_blk;
+ int i = 0;
+ nid_t nid = nm_i->next_scan_nid;
+
+ /* Enough entries */
+ if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
+ return;
+
+ /* readahead nat pages to be scanned */
+ ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
+
+ while (1) {
+ struct page *page = get_current_nat_page(sbi, nid);
+
+ scan_nat_page(sbi, page, nid);
+ f2fs_put_page(page, 1);
+
+ nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
+ if (unlikely(nid >= nm_i->max_nid))
+ nid = 0;
+
+ if (i++ == FREE_NID_PAGES)
+ break;
+ }
+
+ /* go to the next free nat pages to find free nids abundantly */
+ nm_i->next_scan_nid = nid;
+
+ /* find free nids from current sum_pages */
+ mutex_lock(&curseg->curseg_mutex);
+ for (i = 0; i < nats_in_cursum(sum); i++) {
+ block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
+ nid = le32_to_cpu(nid_in_journal(sum, i));
+ if (addr == NULL_ADDR)
+ add_free_nid(sbi, nid, true);
+ else
+ remove_free_nid(nm_i, nid);
+ }
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+/*
+ * If this function returns success, caller can obtain a new nid
+ * from second parameter of this function.
+ * The returned nid could be used ino as well as nid when inode is created.
+ */
+bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct free_nid *i = NULL;
+retry:
+ if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
+ return false;
+
+ spin_lock(&nm_i->free_nid_list_lock);
+
+ /* We should not use stale free nids created by build_free_nids */
+ if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
+ f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
+ list_for_each_entry(i, &nm_i->free_nid_list, list)
+ if (i->state == NID_NEW)
+ break;
+
+ f2fs_bug_on(sbi, i->state != NID_NEW);
+ *nid = i->nid;
+ i->state = NID_ALLOC;
+ nm_i->fcnt--;
+ spin_unlock(&nm_i->free_nid_list_lock);
+ return true;
+ }
+ spin_unlock(&nm_i->free_nid_list_lock);
+
+ /* Let's scan nat pages and its caches to get free nids */
+ mutex_lock(&nm_i->build_lock);
+ build_free_nids(sbi);
+ mutex_unlock(&nm_i->build_lock);
+ goto retry;
+}
+
+/*
+ * alloc_nid() should be called prior to this function.
+ */
+void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct free_nid *i;
+
+ spin_lock(&nm_i->free_nid_list_lock);
+ i = __lookup_free_nid_list(nm_i, nid);
+ f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
+ __del_from_free_nid_list(nm_i, i);
+ spin_unlock(&nm_i->free_nid_list_lock);
+
+ kmem_cache_free(free_nid_slab, i);
+}
+
+/*
+ * alloc_nid() should be called prior to this function.
+ */
+void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct free_nid *i;
+ bool need_free = false;
+
+ if (!nid)
+ return;
+
+ spin_lock(&nm_i->free_nid_list_lock);
+ i = __lookup_free_nid_list(nm_i, nid);
+ f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
+ if (!available_free_memory(sbi, FREE_NIDS)) {
+ __del_from_free_nid_list(nm_i, i);
+ need_free = true;
+ } else {
+ i->state = NID_NEW;
+ nm_i->fcnt++;
+ }
+ spin_unlock(&nm_i->free_nid_list_lock);
+
+ if (need_free)
+ kmem_cache_free(free_nid_slab, i);
+}
+
+void recover_inline_xattr(struct inode *inode, struct page *page)
+{
+ void *src_addr, *dst_addr;
+ size_t inline_size;
+ struct page *ipage;
+ struct f2fs_inode *ri;
+
+ ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
+
+ ri = F2FS_INODE(page);
+ if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
+ goto update_inode;
+ }
+
+ dst_addr = inline_xattr_addr(ipage);
+ src_addr = inline_xattr_addr(page);
+ inline_size = inline_xattr_size(inode);
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ memcpy(dst_addr, src_addr, inline_size);
+update_inode:
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+}
+
+void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
+ nid_t new_xnid = nid_of_node(page);
+ struct node_info ni;
+
+ /* 1: invalidate the previous xattr nid */
+ if (!prev_xnid)
+ goto recover_xnid;
+
+ /* Deallocate node address */
+ get_node_info(sbi, prev_xnid, &ni);
+ f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
+ invalidate_blocks(sbi, ni.blk_addr);
+ dec_valid_node_count(sbi, inode);
+ set_node_addr(sbi, &ni, NULL_ADDR, false);
+
+recover_xnid:
+ /* 2: allocate new xattr nid */
+ if (unlikely(!inc_valid_node_count(sbi, inode)))
+ f2fs_bug_on(sbi, 1);
+
+ remove_free_nid(NM_I(sbi), new_xnid);
+ get_node_info(sbi, new_xnid, &ni);
+ ni.ino = inode->i_ino;
+ set_node_addr(sbi, &ni, NEW_ADDR, false);
+ F2FS_I(inode)->i_xattr_nid = new_xnid;
+
+ /* 3: update xattr blkaddr */
+ refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
+ set_node_addr(sbi, &ni, blkaddr, false);
+
+ update_inode_page(inode);
+}
+
+int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
+{
+ struct f2fs_inode *src, *dst;
+ nid_t ino = ino_of_node(page);
+ struct node_info old_ni, new_ni;
+ struct page *ipage;
+
+ get_node_info(sbi, ino, &old_ni);
+
+ if (unlikely(old_ni.blk_addr != NULL_ADDR))
+ return -EINVAL;
+
+ ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
+ if (!ipage)
+ return -ENOMEM;
+
+ /* Should not use this inode from free nid list */
+ remove_free_nid(NM_I(sbi), ino);
+
+ SetPageUptodate(ipage);
+ fill_node_footer(ipage, ino, ino, 0, true);
+
+ src = F2FS_INODE(page);
+ dst = F2FS_INODE(ipage);
+
+ memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
+ dst->i_size = 0;
+ dst->i_blocks = cpu_to_le64(1);
+ dst->i_links = cpu_to_le32(1);
+ dst->i_xattr_nid = 0;
+ dst->i_inline = src->i_inline & F2FS_INLINE_XATTR;
+
+ new_ni = old_ni;
+ new_ni.ino = ino;
+
+ if (unlikely(!inc_valid_node_count(sbi, NULL)))
+ WARN_ON(1);
+ set_node_addr(sbi, &new_ni, NEW_ADDR, false);
+ inc_valid_inode_count(sbi);
+ set_page_dirty(ipage);
+ f2fs_put_page(ipage, 1);
+ return 0;
+}
+
+int restore_node_summary(struct f2fs_sb_info *sbi,
+ unsigned int segno, struct f2fs_summary_block *sum)
+{
+ struct f2fs_node *rn;
+ struct f2fs_summary *sum_entry;
+ block_t addr;
+ int bio_blocks = MAX_BIO_BLOCKS(sbi);
+ int i, idx, last_offset, nrpages;
+
+ /* scan the node segment */
+ last_offset = sbi->blocks_per_seg;
+ addr = START_BLOCK(sbi, segno);
+ sum_entry = &sum->entries[0];
+
+ for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
+ nrpages = min(last_offset - i, bio_blocks);
+
+ /* readahead node pages */
+ ra_meta_pages(sbi, addr, nrpages, META_POR);
+
+ for (idx = addr; idx < addr + nrpages; idx++) {
+ struct page *page = get_meta_page(sbi, idx);
+
+ rn = F2FS_NODE(page);
+ sum_entry->nid = rn->footer.nid;
+ sum_entry->version = 0;
+ sum_entry->ofs_in_node = 0;
+ sum_entry++;
+ f2fs_put_page(page, 1);
+ }
+
+ invalidate_mapping_pages(META_MAPPING(sbi), addr,
+ addr + nrpages);
+ }
+ return 0;
+}
+
+static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+ struct f2fs_summary_block *sum = curseg->sum_blk;
+ int i;
+
+ mutex_lock(&curseg->curseg_mutex);
+ for (i = 0; i < nats_in_cursum(sum); i++) {
+ struct nat_entry *ne;
+ struct f2fs_nat_entry raw_ne;
+ nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
+
+ raw_ne = nat_in_journal(sum, i);
+
+ down_write(&nm_i->nat_tree_lock);
+ ne = __lookup_nat_cache(nm_i, nid);
+ if (!ne) {
+ ne = grab_nat_entry(nm_i, nid);
+ node_info_from_raw_nat(&ne->ni, &raw_ne);
+ }
+ __set_nat_cache_dirty(nm_i, ne);
+ up_write(&nm_i->nat_tree_lock);
+ }
+ update_nats_in_cursum(sum, -i);
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+static void __adjust_nat_entry_set(struct nat_entry_set *nes,
+ struct list_head *head, int max)
+{
+ struct nat_entry_set *cur;
+
+ if (nes->entry_cnt >= max)
+ goto add_out;
+
+ list_for_each_entry(cur, head, set_list) {
+ if (cur->entry_cnt >= nes->entry_cnt) {
+ list_add(&nes->set_list, cur->set_list.prev);
+ return;
+ }
+ }
+add_out:
+ list_add_tail(&nes->set_list, head);
+}
+
+static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
+ struct nat_entry_set *set)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+ struct f2fs_summary_block *sum = curseg->sum_blk;
+ nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
+ bool to_journal = true;
+ struct f2fs_nat_block *nat_blk;
+ struct nat_entry *ne, *cur;
+ struct page *page = NULL;
+
+ /*
+ * there are two steps to flush nat entries:
+ * #1, flush nat entries to journal in current hot data summary block.
+ * #2, flush nat entries to nat page.
+ */
+ if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
+ to_journal = false;
+
+ if (to_journal) {
+ mutex_lock(&curseg->curseg_mutex);
+ } else {
+ page = get_next_nat_page(sbi, start_nid);
+ nat_blk = page_address(page);
+ f2fs_bug_on(sbi, !nat_blk);
+ }
+
+ /* flush dirty nats in nat entry set */
+ list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
+ struct f2fs_nat_entry *raw_ne;
+ nid_t nid = nat_get_nid(ne);
+ int offset;
+
+ if (nat_get_blkaddr(ne) == NEW_ADDR)
+ continue;
+
+ if (to_journal) {
+ offset = lookup_journal_in_cursum(sum,
+ NAT_JOURNAL, nid, 1);
+ f2fs_bug_on(sbi, offset < 0);
+ raw_ne = &nat_in_journal(sum, offset);
+ nid_in_journal(sum, offset) = cpu_to_le32(nid);
+ } else {
+ raw_ne = &nat_blk->entries[nid - start_nid];
+ }
+ raw_nat_from_node_info(raw_ne, &ne->ni);
+
+ down_write(&NM_I(sbi)->nat_tree_lock);
+ nat_reset_flag(ne);
+ __clear_nat_cache_dirty(NM_I(sbi), ne);
+ up_write(&NM_I(sbi)->nat_tree_lock);
+
+ if (nat_get_blkaddr(ne) == NULL_ADDR)
+ add_free_nid(sbi, nid, false);
+ }
+
+ if (to_journal)
+ mutex_unlock(&curseg->curseg_mutex);
+ else
+ f2fs_put_page(page, 1);
+
+ f2fs_bug_on(sbi, set->entry_cnt);
+
+ radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
+ kmem_cache_free(nat_entry_set_slab, set);
+}
+
+/*
+ * This function is called during the checkpointing process.
+ */
+void flush_nat_entries(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+ struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct nat_entry_set *setvec[SETVEC_SIZE];
+ struct nat_entry_set *set, *tmp;
+ unsigned int found;
+ nid_t set_idx = 0;
+ LIST_HEAD(sets);
+
+ if (!nm_i->dirty_nat_cnt)
+ return;
+ /*
+ * if there are no enough space in journal to store dirty nat
+ * entries, remove all entries from journal and merge them
+ * into nat entry set.
+ */
+ if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+ remove_nats_in_journal(sbi);
+
+ while ((found = __gang_lookup_nat_set(nm_i,
+ set_idx, SETVEC_SIZE, setvec))) {
+ unsigned idx;
+ set_idx = setvec[found - 1]->set + 1;
+ for (idx = 0; idx < found; idx++)
+ __adjust_nat_entry_set(setvec[idx], &sets,
+ MAX_NAT_JENTRIES(sum));
+ }
+
+ /* flush dirty nats in nat entry set */
+ list_for_each_entry_safe(set, tmp, &sets, set_list)
+ __flush_nat_entry_set(sbi, set);
+
+ f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
+}
+
+static int init_node_manager(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned char *version_bitmap;
+ unsigned int nat_segs, nat_blocks;
+
+ nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
+
+ /* segment_count_nat includes pair segment so divide to 2. */
+ nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
+ nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+
+ nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+
+ /* not used nids: 0, node, meta, (and root counted as valid node) */
+ nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
+ nm_i->fcnt = 0;
+ nm_i->nat_cnt = 0;
+ nm_i->ram_thresh = DEF_RAM_THRESHOLD;
+
+ INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
+ INIT_LIST_HEAD(&nm_i->free_nid_list);
+ INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
+ INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
+ INIT_LIST_HEAD(&nm_i->nat_entries);
+
+ mutex_init(&nm_i->build_lock);
+ spin_lock_init(&nm_i->free_nid_list_lock);
+ init_rwsem(&nm_i->nat_tree_lock);
+
+ nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
+ nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
+ version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
+ if (!version_bitmap)
+ return -EFAULT;
+
+ nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
+ GFP_KERNEL);
+ if (!nm_i->nat_bitmap)
+ return -ENOMEM;
+ return 0;
+}
+
+int build_node_manager(struct f2fs_sb_info *sbi)
+{
+ int err;
+
+ sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
+ if (!sbi->nm_info)
+ return -ENOMEM;
+
+ err = init_node_manager(sbi);
+ if (err)
+ return err;
+
+ build_free_nids(sbi);
+ return 0;
+}
+
+void destroy_node_manager(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct free_nid *i, *next_i;
+ struct nat_entry *natvec[NATVEC_SIZE];
+ struct nat_entry_set *setvec[SETVEC_SIZE];
+ nid_t nid = 0;
+ unsigned int found;
+
+ if (!nm_i)
+ return;
+
+ /* destroy free nid list */
+ spin_lock(&nm_i->free_nid_list_lock);
+ list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
+ f2fs_bug_on(sbi, i->state == NID_ALLOC);
+ __del_from_free_nid_list(nm_i, i);
+ nm_i->fcnt--;
+ spin_unlock(&nm_i->free_nid_list_lock);
+ kmem_cache_free(free_nid_slab, i);
+ spin_lock(&nm_i->free_nid_list_lock);
+ }
+ f2fs_bug_on(sbi, nm_i->fcnt);
+ spin_unlock(&nm_i->free_nid_list_lock);
+
+ /* destroy nat cache */
+ down_write(&nm_i->nat_tree_lock);
+ while ((found = __gang_lookup_nat_cache(nm_i,
+ nid, NATVEC_SIZE, natvec))) {
+ unsigned idx;
+
+ nid = nat_get_nid(natvec[found - 1]) + 1;
+ for (idx = 0; idx < found; idx++)
+ __del_from_nat_cache(nm_i, natvec[idx]);
+ }
+ f2fs_bug_on(sbi, nm_i->nat_cnt);
+
+ /* destroy nat set cache */
+ nid = 0;
+ while ((found = __gang_lookup_nat_set(nm_i,
+ nid, SETVEC_SIZE, setvec))) {
+ unsigned idx;
+
+ nid = setvec[found - 1]->set + 1;
+ for (idx = 0; idx < found; idx++) {
+ /* entry_cnt is not zero, when cp_error was occurred */
+ f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
+ radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
+ kmem_cache_free(nat_entry_set_slab, setvec[idx]);
+ }
+ }
+ up_write(&nm_i->nat_tree_lock);
+
+ kfree(nm_i->nat_bitmap);
+ sbi->nm_info = NULL;
+ kfree(nm_i);
+}
+
+int __init create_node_manager_caches(void)
+{
+ nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
+ sizeof(struct nat_entry));
+ if (!nat_entry_slab)
+ goto fail;
+
+ free_nid_slab = f2fs_kmem_cache_create("free_nid",
+ sizeof(struct free_nid));
+ if (!free_nid_slab)
+ goto destroy_nat_entry;
+
+ nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
+ sizeof(struct nat_entry_set));
+ if (!nat_entry_set_slab)
+ goto destroy_free_nid;
+ return 0;
+
+destroy_free_nid:
+ kmem_cache_destroy(free_nid_slab);
+destroy_nat_entry:
+ kmem_cache_destroy(nat_entry_slab);
+fail:
+ return -ENOMEM;
+}
+
+void destroy_node_manager_caches(void)
+{
+ kmem_cache_destroy(nat_entry_set_slab);
+ kmem_cache_destroy(free_nid_slab);
+ kmem_cache_destroy(nat_entry_slab);
+}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
new file mode 100644
index 0000000..f405bbf
--- /dev/null
+++ b/fs/f2fs/node.h
@@ -0,0 +1,415 @@
+/*
+ * fs/f2fs/node.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/* start node id of a node block dedicated to the given node id */
+#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
+
+/* node block offset on the NAT area dedicated to the given start node id */
+#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
+
+/* # of pages to perform readahead before building free nids */
+#define FREE_NID_PAGES 4
+
+/* maximum readahead size for node during getting data blocks */
+#define MAX_RA_NODE 128
+
+/* control the memory footprint threshold (10MB per 1GB ram) */
+#define DEF_RAM_THRESHOLD 10
+
+/* vector size for gang look-up from nat cache that consists of radix tree */
+#define NATVEC_SIZE 64
+#define SETVEC_SIZE 32
+
+/* return value for read_node_page */
+#define LOCKED_PAGE 1
+
+/* For flag in struct node_info */
+enum {
+ IS_CHECKPOINTED, /* is it checkpointed before? */
+ HAS_FSYNCED_INODE, /* is the inode fsynced before? */
+ HAS_LAST_FSYNC, /* has the latest node fsync mark? */
+ IS_DIRTY, /* this nat entry is dirty? */
+};
+
+/*
+ * For node information
+ */
+struct node_info {
+ nid_t nid; /* node id */
+ nid_t ino; /* inode number of the node's owner */
+ block_t blk_addr; /* block address of the node */
+ unsigned char version; /* version of the node */
+ unsigned char flag; /* for node information bits */
+};
+
+struct nat_entry {
+ struct list_head list; /* for clean or dirty nat list */
+ struct node_info ni; /* in-memory node information */
+};
+
+#define nat_get_nid(nat) (nat->ni.nid)
+#define nat_set_nid(nat, n) (nat->ni.nid = n)
+#define nat_get_blkaddr(nat) (nat->ni.blk_addr)
+#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b)
+#define nat_get_ino(nat) (nat->ni.ino)
+#define nat_set_ino(nat, i) (nat->ni.ino = i)
+#define nat_get_version(nat) (nat->ni.version)
+#define nat_set_version(nat, v) (nat->ni.version = v)
+
+#define inc_node_version(version) (++version)
+
+static inline void copy_node_info(struct node_info *dst,
+ struct node_info *src)
+{
+ dst->nid = src->nid;
+ dst->ino = src->ino;
+ dst->blk_addr = src->blk_addr;
+ dst->version = src->version;
+ /* should not copy flag here */
+}
+
+static inline void set_nat_flag(struct nat_entry *ne,
+ unsigned int type, bool set)
+{
+ unsigned char mask = 0x01 << type;
+ if (set)
+ ne->ni.flag |= mask;
+ else
+ ne->ni.flag &= ~mask;
+}
+
+static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
+{
+ unsigned char mask = 0x01 << type;
+ return ne->ni.flag & mask;
+}
+
+static inline void nat_reset_flag(struct nat_entry *ne)
+{
+ /* these states can be set only after checkpoint was done */
+ set_nat_flag(ne, IS_CHECKPOINTED, true);
+ set_nat_flag(ne, HAS_FSYNCED_INODE, false);
+ set_nat_flag(ne, HAS_LAST_FSYNC, true);
+}
+
+static inline void node_info_from_raw_nat(struct node_info *ni,
+ struct f2fs_nat_entry *raw_ne)
+{
+ ni->ino = le32_to_cpu(raw_ne->ino);
+ ni->blk_addr = le32_to_cpu(raw_ne->block_addr);
+ ni->version = raw_ne->version;
+}
+
+static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
+ struct node_info *ni)
+{
+ raw_ne->ino = cpu_to_le32(ni->ino);
+ raw_ne->block_addr = cpu_to_le32(ni->blk_addr);
+ raw_ne->version = ni->version;
+}
+
+enum mem_type {
+ FREE_NIDS, /* indicates the free nid list */
+ NAT_ENTRIES, /* indicates the cached nat entry */
+ DIRTY_DENTS, /* indicates dirty dentry pages */
+ INO_ENTRIES, /* indicates inode entries */
+ BASE_CHECK, /* check kernel status */
+};
+
+struct nat_entry_set {
+ struct list_head set_list; /* link with other nat sets */
+ struct list_head entry_list; /* link with dirty nat entries */
+ nid_t set; /* set number*/
+ unsigned int entry_cnt; /* the # of nat entries in set */
+};
+
+/*
+ * For free nid mangement
+ */
+enum nid_state {
+ NID_NEW, /* newly added to free nid list */
+ NID_ALLOC /* it is allocated */
+};
+
+struct free_nid {
+ struct list_head list; /* for free node id list */
+ nid_t nid; /* node id */
+ int state; /* in use or not: NID_NEW or NID_ALLOC */
+};
+
+static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct free_nid *fnid;
+
+ spin_lock(&nm_i->free_nid_list_lock);
+ if (nm_i->fcnt <= 0) {
+ spin_unlock(&nm_i->free_nid_list_lock);
+ return;
+ }
+ fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list);
+ *nid = fnid->nid;
+ spin_unlock(&nm_i->free_nid_list_lock);
+}
+
+/*
+ * inline functions
+ */
+static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
+}
+
+static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ pgoff_t block_off;
+ pgoff_t block_addr;
+ int seg_off;
+
+ block_off = NAT_BLOCK_OFFSET(start);
+ seg_off = block_off >> sbi->log_blocks_per_seg;
+
+ block_addr = (pgoff_t)(nm_i->nat_blkaddr +
+ (seg_off << sbi->log_blocks_per_seg << 1) +
+ (block_off & ((1 << sbi->log_blocks_per_seg) - 1)));
+
+ if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
+ block_addr += sbi->blocks_per_seg;
+
+ return block_addr;
+}
+
+static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
+ pgoff_t block_addr)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ block_addr -= nm_i->nat_blkaddr;
+ if ((block_addr >> sbi->log_blocks_per_seg) % 2)
+ block_addr -= sbi->blocks_per_seg;
+ else
+ block_addr += sbi->blocks_per_seg;
+
+ return block_addr + nm_i->nat_blkaddr;
+}
+
+static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
+{
+ unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
+
+ f2fs_change_bit(block_off, nm_i->nat_bitmap);
+}
+
+static inline void fill_node_footer(struct page *page, nid_t nid,
+ nid_t ino, unsigned int ofs, bool reset)
+{
+ struct f2fs_node *rn = F2FS_NODE(page);
+ unsigned int old_flag = 0;
+
+ if (reset)
+ memset(rn, 0, sizeof(*rn));
+ else
+ old_flag = le32_to_cpu(rn->footer.flag);
+
+ rn->footer.nid = cpu_to_le32(nid);
+ rn->footer.ino = cpu_to_le32(ino);
+
+ /* should remain old flag bits such as COLD_BIT_SHIFT */
+ rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) |
+ (old_flag & OFFSET_BIT_MASK));
+}
+
+static inline void copy_node_footer(struct page *dst, struct page *src)
+{
+ struct f2fs_node *src_rn = F2FS_NODE(src);
+ struct f2fs_node *dst_rn = F2FS_NODE(dst);
+ memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer));
+}
+
+static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
+ struct f2fs_node *rn = F2FS_NODE(page);
+
+ rn->footer.cp_ver = ckpt->checkpoint_ver;
+ rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
+}
+
+static inline nid_t ino_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le32_to_cpu(rn->footer.ino);
+}
+
+static inline nid_t nid_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le32_to_cpu(rn->footer.nid);
+}
+
+static inline unsigned int ofs_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ unsigned flag = le32_to_cpu(rn->footer.flag);
+ return flag >> OFFSET_BIT_SHIFT;
+}
+
+static inline unsigned long long cpver_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le64_to_cpu(rn->footer.cp_ver);
+}
+
+static inline block_t next_blkaddr_of_node(struct page *node_page)
+{
+ struct f2fs_node *rn = F2FS_NODE(node_page);
+ return le32_to_cpu(rn->footer.next_blkaddr);
+}
+
+/*
+ * f2fs assigns the following node offsets described as (num).
+ * N = NIDS_PER_BLOCK
+ *
+ * Inode block (0)
+ * |- direct node (1)
+ * |- direct node (2)
+ * |- indirect node (3)
+ * | `- direct node (4 => 4 + N - 1)
+ * |- indirect node (4 + N)
+ * | `- direct node (5 + N => 5 + 2N - 1)
+ * `- double indirect node (5 + 2N)
+ * `- indirect node (6 + 2N)
+ * `- direct node
+ * ......
+ * `- indirect node ((6 + 2N) + x(N + 1))
+ * `- direct node
+ * ......
+ * `- indirect node ((6 + 2N) + (N - 1)(N + 1))
+ * `- direct node
+ */
+static inline bool IS_DNODE(struct page *node_page)
+{
+ unsigned int ofs = ofs_of_node(node_page);
+
+ if (f2fs_has_xattr_block(ofs))
+ return false;
+
+ if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
+ ofs == 5 + 2 * NIDS_PER_BLOCK)
+ return false;
+ if (ofs >= 6 + 2 * NIDS_PER_BLOCK) {
+ ofs -= 6 + 2 * NIDS_PER_BLOCK;
+ if (!((long int)ofs % (NIDS_PER_BLOCK + 1)))
+ return false;
+ }
+ return true;
+}
+
+static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
+{
+ struct f2fs_node *rn = F2FS_NODE(p);
+
+ f2fs_wait_on_page_writeback(p, NODE);
+
+ if (i)
+ rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
+ else
+ rn->in.nid[off] = cpu_to_le32(nid);
+ set_page_dirty(p);
+}
+
+static inline nid_t get_nid(struct page *p, int off, bool i)
+{
+ struct f2fs_node *rn = F2FS_NODE(p);
+
+ if (i)
+ return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]);
+ return le32_to_cpu(rn->in.nid[off]);
+}
+
+/*
+ * Coldness identification:
+ * - Mark cold files in f2fs_inode_info
+ * - Mark cold node blocks in their node footer
+ * - Mark cold data pages in page cache
+ */
+static inline int is_file(struct inode *inode, int type)
+{
+ return F2FS_I(inode)->i_advise & type;
+}
+
+static inline void set_file(struct inode *inode, int type)
+{
+ F2FS_I(inode)->i_advise |= type;
+}
+
+static inline void clear_file(struct inode *inode, int type)
+{
+ F2FS_I(inode)->i_advise &= ~type;
+}
+
+#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
+#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
+#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
+#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT)
+#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT)
+#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT)
+
+static inline int is_cold_data(struct page *page)
+{
+ return PageChecked(page);
+}
+
+static inline void set_cold_data(struct page *page)
+{
+ SetPageChecked(page);
+}
+
+static inline void clear_cold_data(struct page *page)
+{
+ ClearPageChecked(page);
+}
+
+static inline int is_node(struct page *page, int type)
+{
+ struct f2fs_node *rn = F2FS_NODE(page);
+ return le32_to_cpu(rn->footer.flag) & (1 << type);
+}
+
+#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT)
+#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT)
+#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT)
+
+static inline void set_cold_node(struct inode *inode, struct page *page)
+{
+ struct f2fs_node *rn = F2FS_NODE(page);
+ unsigned int flag = le32_to_cpu(rn->footer.flag);
+
+ if (S_ISDIR(inode->i_mode))
+ flag &= ~(0x1 << COLD_BIT_SHIFT);
+ else
+ flag |= (0x1 << COLD_BIT_SHIFT);
+ rn->footer.flag = cpu_to_le32(flag);
+}
+
+static inline void set_mark(struct page *page, int mark, int type)
+{
+ struct f2fs_node *rn = F2FS_NODE(page);
+ unsigned int flag = le32_to_cpu(rn->footer.flag);
+ if (mark)
+ flag |= (0x1 << type);
+ else
+ flag &= ~(0x1 << type);
+ rn->footer.flag = cpu_to_le32(flag);
+}
+#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
+#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
new file mode 100644
index 0000000..01a4e0b
--- /dev/null
+++ b/fs/f2fs/recovery.c
@@ -0,0 +1,563 @@
+/*
+ * fs/f2fs/recovery.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+
+/*
+ * Roll forward recovery scenarios.
+ *
+ * [Term] F: fsync_mark, D: dentry_mark
+ *
+ * 1. inode(x) | CP | inode(x) | dnode(F)
+ * -> Update the latest inode(x).
+ *
+ * 2. inode(x) | CP | inode(F) | dnode(F)
+ * -> No problem.
+ *
+ * 3. inode(x) | CP | dnode(F) | inode(x)
+ * -> Recover to the latest dnode(F), and drop the last inode(x)
+ *
+ * 4. inode(x) | CP | dnode(F) | inode(F)
+ * -> No problem.
+ *
+ * 5. CP | inode(x) | dnode(F)
+ * -> The inode(DF) was missing. Should drop this dnode(F).
+ *
+ * 6. CP | inode(DF) | dnode(F)
+ * -> No problem.
+ *
+ * 7. CP | dnode(F) | inode(DF)
+ * -> If f2fs_iget fails, then goto next to find inode(DF).
+ *
+ * 8. CP | dnode(F) | inode(x)
+ * -> If f2fs_iget fails, then goto next to find inode(DF).
+ * But it will fail due to no inode(DF).
+ */
+
+static struct kmem_cache *fsync_entry_slab;
+
+bool space_for_roll_forward(struct f2fs_sb_info *sbi)
+{
+ if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
+ > sbi->user_block_count)
+ return false;
+ return true;
+}
+
+static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
+ nid_t ino)
+{
+ struct fsync_inode_entry *entry;
+
+ list_for_each_entry(entry, head, list)
+ if (entry->inode->i_ino == ino)
+ return entry;
+
+ return NULL;
+}
+
+static int recover_dentry(struct inode *inode, struct page *ipage)
+{
+ struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
+ nid_t pino = le32_to_cpu(raw_inode->i_pino);
+ struct f2fs_dir_entry *de;
+ struct qstr name;
+ struct page *page;
+ struct inode *dir, *einode;
+ int err = 0;
+
+ dir = f2fs_iget(inode->i_sb, pino);
+ if (IS_ERR(dir)) {
+ err = PTR_ERR(dir);
+ goto out;
+ }
+
+ name.len = le32_to_cpu(raw_inode->i_namelen);
+ name.name = raw_inode->i_name;
+
+ if (unlikely(name.len > F2FS_NAME_LEN)) {
+ WARN_ON(1);
+ err = -ENAMETOOLONG;
+ goto out_err;
+ }
+retry:
+ de = f2fs_find_entry(dir, &name, &page);
+ if (de && inode->i_ino == le32_to_cpu(de->ino)) {
+ clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+ goto out_unmap_put;
+ }
+ if (de) {
+ einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
+ if (IS_ERR(einode)) {
+ WARN_ON(1);
+ err = PTR_ERR(einode);
+ if (err == -ENOENT)
+ err = -EEXIST;
+ goto out_unmap_put;
+ }
+ err = acquire_orphan_inode(F2FS_I_SB(inode));
+ if (err) {
+ iput(einode);
+ goto out_unmap_put;
+ }
+ f2fs_delete_entry(de, page, dir, einode);
+ iput(einode);
+ goto retry;
+ }
+ err = __f2fs_add_link(dir, &name, inode);
+ if (err)
+ goto out_err;
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
+ iput(dir);
+ } else {
+ add_dirty_dir_inode(dir);
+ set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
+ }
+
+ goto out;
+
+out_unmap_put:
+ f2fs_dentry_kunmap(dir, page);
+ f2fs_put_page(page, 0);
+out_err:
+ iput(dir);
+out:
+ f2fs_msg(inode->i_sb, KERN_NOTICE,
+ "%s: ino = %x, name = %s, dir = %lx, err = %d",
+ __func__, ino_of_node(ipage), raw_inode->i_name,
+ IS_ERR(dir) ? 0 : dir->i_ino, err);
+ return err;
+}
+
+static void recover_inode(struct inode *inode, struct page *page)
+{
+ struct f2fs_inode *raw = F2FS_INODE(page);
+
+ inode->i_mode = le16_to_cpu(raw->i_mode);
+ i_size_write(inode, le64_to_cpu(raw->i_size));
+ inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
+ inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
+ inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
+ inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
+ inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
+ inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
+
+ f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
+ ino_of_node(page), F2FS_INODE(page)->i_name);
+}
+
+static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
+{
+ unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
+ struct curseg_info *curseg;
+ struct page *page = NULL;
+ block_t blkaddr;
+ int err = 0;
+
+ /* get node pages in the current segment */
+ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
+ ra_meta_pages(sbi, blkaddr, 1, META_POR);
+
+ while (1) {
+ struct fsync_inode_entry *entry;
+
+ if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
+ return 0;
+
+ page = get_meta_page(sbi, blkaddr);
+
+ if (cp_ver != cpver_of_node(page))
+ break;
+
+ if (!is_fsync_dnode(page))
+ goto next;
+
+ entry = get_fsync_inode(head, ino_of_node(page));
+ if (entry) {
+ if (IS_INODE(page) && is_dent_dnode(page))
+ set_inode_flag(F2FS_I(entry->inode),
+ FI_INC_LINK);
+ } else {
+ if (IS_INODE(page) && is_dent_dnode(page)) {
+ err = recover_inode_page(sbi, page);
+ if (err)
+ break;
+ }
+
+ /* add this fsync inode to the list */
+ entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
+ if (!entry) {
+ err = -ENOMEM;
+ break;
+ }
+ /*
+ * CP | dnode(F) | inode(DF)
+ * For this case, we should not give up now.
+ */
+ entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
+ if (IS_ERR(entry->inode)) {
+ err = PTR_ERR(entry->inode);
+ kmem_cache_free(fsync_entry_slab, entry);
+ if (err == -ENOENT)
+ goto next;
+ break;
+ }
+ list_add_tail(&entry->list, head);
+ }
+ entry->blkaddr = blkaddr;
+
+ if (IS_INODE(page)) {
+ entry->last_inode = blkaddr;
+ if (is_dent_dnode(page))
+ entry->last_dentry = blkaddr;
+ }
+next:
+ /* check next segment */
+ blkaddr = next_blkaddr_of_node(page);
+ f2fs_put_page(page, 1);
+
+ ra_meta_pages_cond(sbi, blkaddr);
+ }
+ f2fs_put_page(page, 1);
+ return err;
+}
+
+static void destroy_fsync_dnodes(struct list_head *head)
+{
+ struct fsync_inode_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, head, list) {
+ iput(entry->inode);
+ list_del(&entry->list);
+ kmem_cache_free(fsync_entry_slab, entry);
+ }
+}
+
+static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
+ block_t blkaddr, struct dnode_of_data *dn)
+{
+ struct seg_entry *sentry;
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+ unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+ struct f2fs_summary_block *sum_node;
+ struct f2fs_summary sum;
+ struct page *sum_page, *node_page;
+ nid_t ino, nid;
+ struct inode *inode;
+ unsigned int offset;
+ block_t bidx;
+ int i;
+
+ sentry = get_seg_entry(sbi, segno);
+ if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
+ return 0;
+
+ /* Get the previous summary */
+ for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
+ struct curseg_info *curseg = CURSEG_I(sbi, i);
+ if (curseg->segno == segno) {
+ sum = curseg->sum_blk->entries[blkoff];
+ goto got_it;
+ }
+ }
+
+ sum_page = get_sum_page(sbi, segno);
+ sum_node = (struct f2fs_summary_block *)page_address(sum_page);
+ sum = sum_node->entries[blkoff];
+ f2fs_put_page(sum_page, 1);
+got_it:
+ /* Use the locked dnode page and inode */
+ nid = le32_to_cpu(sum.nid);
+ if (dn->inode->i_ino == nid) {
+ struct dnode_of_data tdn = *dn;
+ tdn.nid = nid;
+ tdn.node_page = dn->inode_page;
+ tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+ truncate_data_blocks_range(&tdn, 1);
+ return 0;
+ } else if (dn->nid == nid) {
+ struct dnode_of_data tdn = *dn;
+ tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
+ truncate_data_blocks_range(&tdn, 1);
+ return 0;
+ }
+
+ /* Get the node page */
+ node_page = get_node_page(sbi, nid);
+ if (IS_ERR(node_page))
+ return PTR_ERR(node_page);
+
+ offset = ofs_of_node(node_page);
+ ino = ino_of_node(node_page);
+ f2fs_put_page(node_page, 1);
+
+ if (ino != dn->inode->i_ino) {
+ /* Deallocate previous index in the node page */
+ inode = f2fs_iget(sbi->sb, ino);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ } else {
+ inode = dn->inode;
+ }
+
+ bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
+ le16_to_cpu(sum.ofs_in_node);
+
+ if (ino != dn->inode->i_ino) {
+ truncate_hole(inode, bidx, bidx + 1);
+ iput(inode);
+ } else {
+ struct dnode_of_data tdn;
+ set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0);
+ if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
+ return 0;
+ if (tdn.data_blkaddr != NULL_ADDR)
+ truncate_data_blocks_range(&tdn, 1);
+ f2fs_put_page(tdn.node_page, 1);
+ }
+ return 0;
+}
+
+static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
+ struct page *page, block_t blkaddr)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned int start, end;
+ struct dnode_of_data dn;
+ struct f2fs_summary sum;
+ struct node_info ni;
+ int err = 0, recovered = 0;
+
+ /* step 1: recover xattr */
+ if (IS_INODE(page)) {
+ recover_inline_xattr(inode, page);
+ } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
+ /*
+ * Deprecated; xattr blocks should be found from cold log.
+ * But, we should remain this for backward compatibility.
+ */
+ recover_xattr_data(inode, page, blkaddr);
+ goto out;
+ }
+
+ /* step 2: recover inline data */
+ if (recover_inline_data(inode, page))
+ goto out;
+
+ /* step 3: recover data indices */
+ start = start_bidx_of_node(ofs_of_node(page), fi);
+ end = start + ADDRS_PER_PAGE(page, fi);
+
+ f2fs_lock_op(sbi);
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+
+ err = get_dnode_of_data(&dn, start, ALLOC_NODE);
+ if (err) {
+ f2fs_unlock_op(sbi);
+ goto out;
+ }
+
+ f2fs_wait_on_page_writeback(dn.node_page, NODE);
+
+ get_node_info(sbi, dn.nid, &ni);
+ f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
+ f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
+
+ for (; start < end; start++) {
+ block_t src, dest;
+
+ src = datablock_addr(dn.node_page, dn.ofs_in_node);
+ dest = datablock_addr(page, dn.ofs_in_node);
+
+ if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
+ if (src == NULL_ADDR) {
+ err = reserve_new_block(&dn);
+ /* We should not get -ENOSPC */
+ f2fs_bug_on(sbi, err);
+ }
+
+ /* Check the previous node page having this index */
+ err = check_index_in_prev_nodes(sbi, dest, &dn);
+ if (err)
+ goto err;
+
+ set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
+
+ /* write dummy data page */
+ recover_data_page(sbi, NULL, &sum, src, dest);
+ dn.data_blkaddr = dest;
+ update_extent_cache(&dn);
+ recovered++;
+ }
+ dn.ofs_in_node++;
+ }
+
+ /* write node page in place */
+ set_summary(&sum, dn.nid, 0, 0);
+ if (IS_INODE(dn.node_page))
+ sync_inode_page(&dn);
+
+ copy_node_footer(dn.node_page, page);
+ fill_node_footer(dn.node_page, dn.nid, ni.ino,
+ ofs_of_node(page), false);
+ set_page_dirty(dn.node_page);
+err:
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
+out:
+ f2fs_msg(sbi->sb, KERN_NOTICE,
+ "recover_data: ino = %lx, recovered = %d blocks, err = %d",
+ inode->i_ino, recovered, err);
+ return err;
+}
+
+static int recover_data(struct f2fs_sb_info *sbi,
+ struct list_head *head, int type)
+{
+ unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
+ struct curseg_info *curseg;
+ struct page *page = NULL;
+ int err = 0;
+ block_t blkaddr;
+
+ /* get node pages in the current segment */
+ curseg = CURSEG_I(sbi, type);
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
+ while (1) {
+ struct fsync_inode_entry *entry;
+
+ if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
+ break;
+
+ ra_meta_pages_cond(sbi, blkaddr);
+
+ page = get_meta_page(sbi, blkaddr);
+
+ if (cp_ver != cpver_of_node(page)) {
+ f2fs_put_page(page, 1);
+ break;
+ }
+
+ entry = get_fsync_inode(head, ino_of_node(page));
+ if (!entry)
+ goto next;
+ /*
+ * inode(x) | CP | inode(x) | dnode(F)
+ * In this case, we can lose the latest inode(x).
+ * So, call recover_inode for the inode update.
+ */
+ if (entry->last_inode == blkaddr)
+ recover_inode(entry->inode, page);
+ if (entry->last_dentry == blkaddr) {
+ err = recover_dentry(entry->inode, page);
+ if (err) {
+ f2fs_put_page(page, 1);
+ break;
+ }
+ }
+ err = do_recover_data(sbi, entry->inode, page, blkaddr);
+ if (err) {
+ f2fs_put_page(page, 1);
+ break;
+ }
+
+ if (entry->blkaddr == blkaddr) {
+ iput(entry->inode);
+ list_del(&entry->list);
+ kmem_cache_free(fsync_entry_slab, entry);
+ }
+next:
+ /* check next segment */
+ blkaddr = next_blkaddr_of_node(page);
+ f2fs_put_page(page, 1);
+ }
+ if (!err)
+ allocate_new_segments(sbi);
+ return err;
+}
+
+int recover_fsync_data(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+ struct list_head inode_list;
+ block_t blkaddr;
+ int err;
+ bool need_writecp = false;
+
+ fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
+ sizeof(struct fsync_inode_entry));
+ if (!fsync_entry_slab)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&inode_list);
+
+ /* step #1: find fsynced inode numbers */
+ set_sbi_flag(sbi, SBI_POR_DOING);
+
+ /* prevent checkpoint */
+ mutex_lock(&sbi->cp_mutex);
+
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
+ err = find_fsync_dnodes(sbi, &inode_list);
+ if (err)
+ goto out;
+
+ if (list_empty(&inode_list))
+ goto out;
+
+ need_writecp = true;
+
+ /* step #2: recover data */
+ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
+ if (!err)
+ f2fs_bug_on(sbi, !list_empty(&inode_list));
+out:
+ destroy_fsync_dnodes(&inode_list);
+ kmem_cache_destroy(fsync_entry_slab);
+
+ /* truncate meta pages to be used by the recovery */
+ truncate_inode_pages_range(META_MAPPING(sbi),
+ MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
+
+ if (err) {
+ truncate_inode_pages(NODE_MAPPING(sbi), 0);
+ truncate_inode_pages(META_MAPPING(sbi), 0);
+ }
+
+ clear_sbi_flag(sbi, SBI_POR_DOING);
+ if (err) {
+ discard_next_dnode(sbi, blkaddr);
+
+ /* Flush all the NAT/SIT pages */
+ while (get_pages(sbi, F2FS_DIRTY_META))
+ sync_meta_pages(sbi, META, LONG_MAX);
+ set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ mutex_unlock(&sbi->cp_mutex);
+ } else if (need_writecp) {
+ struct cp_control cpc = {
+ .reason = CP_SYNC,
+ };
+ mutex_unlock(&sbi->cp_mutex);
+ write_checkpoint(sbi, &cpc);
+ } else {
+ mutex_unlock(&sbi->cp_mutex);
+ }
+ return err;
+}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
new file mode 100644
index 0000000..0dc8ee8
--- /dev/null
+++ b/fs/f2fs/segment.c
@@ -0,0 +1,2386 @@
+/*
+ * fs/f2fs/segment.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/prefetch.h>
+#include <linux/kthread.h>
+#include <linux/vmalloc.h>
+#include <linux/swap.h>
+
+#include "f2fs.h"
+#include "segment.h"
+#include "node.h"
+#include "trace.h"
+#include <trace/events/f2fs.h>
+
+#define __reverse_ffz(x) __reverse_ffs(~(x))
+
+static struct kmem_cache *discard_entry_slab;
+static struct kmem_cache *sit_entry_set_slab;
+static struct kmem_cache *inmem_entry_slab;
+
+/**
+ * Copied from latest lib/llist.c
+ * llist_for_each_entry_safe - iterate over some deleted entries of
+ * lock-less list of given type
+ * safe against removal of list entry
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @node: the first entry of deleted list entries.
+ * @member: the name of the llist_node with the struct.
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being removed from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry. If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_entry_safe(pos, n, node, member) \
+ for (pos = llist_entry((node), typeof(*pos), member); \
+ &pos->member != NULL && \
+ (n = llist_entry(pos->member.next, typeof(*n), member), true); \
+ pos = n)
+
+/**
+ * Copied from latest lib/llist.c
+ * llist_reverse_order - reverse order of a llist chain
+ * @head: first item of the list to be reversed
+ *
+ * Reverse the order of a chain of llist entries and return the
+ * new first entry.
+ */
+struct llist_node *llist_reverse_order(struct llist_node *head)
+{
+ struct llist_node *new_head = NULL;
+
+ while (head) {
+ struct llist_node *tmp = head;
+ head = head->next;
+ tmp->next = new_head;
+ new_head = tmp;
+ }
+
+ return new_head;
+}
+
+/**
+ * Copied from latest linux/list.h
+ * list_last_entry - get the last element from a list
+ * @ptr: the list head to take the element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_last_entry(ptr, type, member) \
+ list_entry((ptr)->prev, type, member)
+
+/*
+ * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
+ * MSB and LSB are reversed in a byte by f2fs_set_bit.
+ */
+static inline unsigned long __reverse_ffs(unsigned long word)
+{
+ int num = 0;
+
+#if BITS_PER_LONG == 64
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+#endif
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf0) == 0)
+ num += 4;
+ else
+ word >>= 4;
+ if ((word & 0xc) == 0)
+ num += 2;
+ else
+ word >>= 2;
+ if ((word & 0x2) == 0)
+ num += 1;
+ return num;
+}
+
+/*
+ * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
+ * f2fs_set_bit makes MSB and LSB reversed in a byte.
+ * Example:
+ * LSB <--> MSB
+ * f2fs_set_bit(0, bitmap) => 0000 0001
+ * f2fs_set_bit(7, bitmap) => 1000 0000
+ */
+static unsigned long __find_rev_next_bit(const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ const unsigned long *p = addr + BIT_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+ unsigned long mask, submask;
+ unsigned long quot, rest;
+
+ if (offset >= size)
+ return size;
+
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (!offset)
+ goto aligned;
+
+ tmp = *(p++);
+ quot = (offset >> 3) << 3;
+ rest = offset & 0x7;
+ mask = ~0UL << quot;
+ submask = (unsigned char)(0xff << rest) >> rest;
+ submask <<= quot;
+ mask &= submask;
+ tmp &= mask;
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+aligned:
+ while (size & ~(BITS_PER_LONG-1)) {
+ tmp = *(p++);
+ if (tmp)
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __reverse_ffs(tmp);
+}
+
+static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
+ unsigned long size, unsigned long offset)
+{
+ const unsigned long *p = addr + BIT_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+ unsigned long mask, submask;
+ unsigned long quot, rest;
+
+ if (offset >= size)
+ return size;
+
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (!offset)
+ goto aligned;
+
+ tmp = *(p++);
+ quot = (offset >> 3) << 3;
+ rest = offset & 0x7;
+ mask = ~(~0UL << quot);
+ submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
+ submask <<= quot;
+ mask += submask;
+ tmp |= mask;
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+aligned:
+ while (size & ~(BITS_PER_LONG - 1)) {
+ tmp = *(p++);
+ if (~tmp)
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __reverse_ffz(tmp);
+}
+
+void register_inmem_page(struct inode *inode, struct page *page)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct inmem_pages *new;
+ int err;
+
+ SetPagePrivate(page);
+ f2fs_trace_pid(page);
+
+ new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
+
+ /* add atomic page indices to the list */
+ new->page = page;
+ INIT_LIST_HEAD(&new->list);
+retry:
+ /* increase reference count with clean state */
+ mutex_lock(&fi->inmem_lock);
+ err = radix_tree_insert(&fi->inmem_root, page->index, new);
+ if (err == -EEXIST) {
+ mutex_unlock(&fi->inmem_lock);
+ kmem_cache_free(inmem_entry_slab, new);
+ return;
+ } else if (err) {
+ mutex_unlock(&fi->inmem_lock);
+ goto retry;
+ }
+ get_page(page);
+ list_add_tail(&new->list, &fi->inmem_pages);
+ inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+ mutex_unlock(&fi->inmem_lock);
+}
+
+void commit_inmem_pages(struct inode *inode, bool abort)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct inmem_pages *cur, *tmp;
+ bool submit_bio = false;
+ struct f2fs_io_info fio = {
+ .type = DATA,
+ .rw = WRITE_SYNC | REQ_PRIO,
+ };
+
+ /*
+ * The abort is true only when f2fs_evict_inode is called.
+ * Basically, the f2fs_evict_inode doesn't produce any data writes, so
+ * that we don't need to call f2fs_balance_fs.
+ * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
+ * inode becomes free by iget_locked in f2fs_iget.
+ */
+ if (!abort) {
+ f2fs_balance_fs(sbi);
+ f2fs_lock_op(sbi);
+ }
+
+ mutex_lock(&fi->inmem_lock);
+ list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
+ if (!abort) {
+ lock_page(cur->page);
+ if (cur->page->mapping == inode->i_mapping) {
+ f2fs_wait_on_page_writeback(cur->page, DATA);
+ if (clear_page_dirty_for_io(cur->page))
+ inode_dec_dirty_pages(inode);
+ do_write_data_page(cur->page, &fio);
+ submit_bio = true;
+ }
+ f2fs_put_page(cur->page, 1);
+ } else {
+ put_page(cur->page);
+ }
+ radix_tree_delete(&fi->inmem_root, cur->page->index);
+ list_del(&cur->list);
+ kmem_cache_free(inmem_entry_slab, cur);
+ dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+ }
+ mutex_unlock(&fi->inmem_lock);
+
+ if (!abort) {
+ f2fs_unlock_op(sbi);
+ if (submit_bio)
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ }
+}
+
+/*
+ * This function balances dirty node and dentry pages.
+ * In addition, it controls garbage collection.
+ */
+void f2fs_balance_fs(struct f2fs_sb_info *sbi)
+{
+ /*
+ * We should do GC or end up with checkpoint, if there are so many dirty
+ * dir/node pages without enough free segments.
+ */
+ if (has_not_enough_free_secs(sbi, 0)) {
+ mutex_lock(&sbi->gc_mutex);
+ f2fs_gc(sbi);
+ }
+}
+
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
+{
+ /* check the # of cached NAT entries and prefree segments */
+ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
+ excess_prefree_segs(sbi) ||
+ !available_free_memory(sbi, INO_ENTRIES))
+ f2fs_sync_fs(sbi->sb, true);
+}
+
+struct __submit_bio_ret {
+ struct completion event;
+ int error;
+};
+
+static void __submit_bio_wait_endio(struct bio *bio, int error)
+{
+ struct __submit_bio_ret *ret = bio->bi_private;
+
+ ret->error = error;
+ complete(&ret->event);
+}
+
+static int __submit_bio_wait(int rw, struct bio *bio)
+{
+ struct __submit_bio_ret ret;
+
+ rw |= REQ_SYNC;
+ init_completion(&ret.event);
+ bio->bi_private = &ret;
+ bio->bi_end_io = __submit_bio_wait_endio;
+ submit_bio(rw, bio);
+ wait_for_completion(&ret.event);
+
+ return ret.error;
+}
+
+static int issue_flush_thread(void *data)
+{
+ struct f2fs_sb_info *sbi = data;
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ wait_queue_head_t *q = &fcc->flush_wait_queue;
+repeat:
+ if (kthread_should_stop())
+ return 0;
+
+ if (!llist_empty(&fcc->issue_list)) {
+ struct bio *bio = bio_alloc(GFP_NOIO, 0);
+ struct flush_cmd *cmd, *next;
+ int ret;
+
+ fcc->dispatch_list = llist_del_all(&fcc->issue_list);
+ fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
+
+ bio->bi_bdev = sbi->sb->s_bdev;
+ ret = __submit_bio_wait(WRITE_FLUSH, bio);
+
+ llist_for_each_entry_safe(cmd, next,
+ fcc->dispatch_list, llnode) {
+ cmd->ret = ret;
+ complete(&cmd->wait);
+ }
+ bio_put(bio);
+ fcc->dispatch_list = NULL;
+ }
+
+ wait_event_interruptible(*q,
+ kthread_should_stop() || !llist_empty(&fcc->issue_list));
+ goto repeat;
+}
+
+int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+{
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd cmd;
+
+ trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
+ test_opt(sbi, FLUSH_MERGE));
+
+ if (test_opt(sbi, NOBARRIER))
+ return 0;
+
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
+
+ init_completion(&cmd.wait);
+
+ llist_add(&cmd.llnode, &fcc->issue_list);
+
+ if (!fcc->dispatch_list)
+ wake_up(&fcc->flush_wait_queue);
+
+ wait_for_completion(&cmd.wait);
+
+ return cmd.ret;
+}
+
+int create_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ struct flush_cmd_control *fcc;
+ int err = 0;
+
+ fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
+ if (!fcc)
+ return -ENOMEM;
+ init_waitqueue_head(&fcc->flush_wait_queue);
+ init_llist_head(&fcc->issue_list);
+ SM_I(sbi)->cmd_control_info = fcc;
+ fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+ "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(fcc->f2fs_issue_flush)) {
+ err = PTR_ERR(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ SM_I(sbi)->cmd_control_info = NULL;
+ return err;
+ }
+
+ return err;
+}
+
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+
+ if (fcc && fcc->f2fs_issue_flush)
+ kthread_stop(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ SM_I(sbi)->cmd_control_info = NULL;
+}
+
+static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
+ enum dirty_type dirty_type)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+ /* need not be added */
+ if (IS_CURSEG(sbi, segno))
+ return;
+
+ if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
+ dirty_i->nr_dirty[dirty_type]++;
+
+ if (dirty_type == DIRTY) {
+ struct seg_entry *sentry = get_seg_entry(sbi, segno);
+ enum dirty_type t = sentry->type;
+
+ if (unlikely(t >= DIRTY)) {
+ f2fs_bug_on(sbi, 1);
+ return;
+ }
+ if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
+ dirty_i->nr_dirty[t]++;
+ }
+}
+
+static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
+ enum dirty_type dirty_type)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
+ dirty_i->nr_dirty[dirty_type]--;
+
+ if (dirty_type == DIRTY) {
+ struct seg_entry *sentry = get_seg_entry(sbi, segno);
+ enum dirty_type t = sentry->type;
+
+ if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
+ dirty_i->nr_dirty[t]--;
+
+ if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
+ clear_bit(GET_SECNO(sbi, segno),
+ dirty_i->victim_secmap);
+ }
+}
+
+/*
+ * Should not occur error such as -ENOMEM.
+ * Adding dirty entry into seglist is not critical operation.
+ * If a given segment is one of current working segments, it won't be added.
+ */
+static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned short valid_blocks;
+
+ if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
+ return;
+
+ mutex_lock(&dirty_i->seglist_lock);
+
+ valid_blocks = get_valid_blocks(sbi, segno, 0);
+
+ if (valid_blocks == 0) {
+ __locate_dirty_segment(sbi, segno, PRE);
+ __remove_dirty_segment(sbi, segno, DIRTY);
+ } else if (valid_blocks < sbi->blocks_per_seg) {
+ __locate_dirty_segment(sbi, segno, DIRTY);
+ } else {
+ /* Recovery routine with SSR needs this */
+ __remove_dirty_segment(sbi, segno, DIRTY);
+ }
+
+ mutex_unlock(&dirty_i->seglist_lock);
+}
+
+static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
+ block_t blkstart, block_t blklen)
+{
+ sector_t start = SECTOR_FROM_BLOCK(blkstart);
+ sector_t len = SECTOR_FROM_BLOCK(blklen);
+ trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
+ return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
+}
+
+void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+ if (f2fs_issue_discard(sbi, blkaddr, 1)) {
+ struct page *page = grab_meta_page(sbi, blkaddr);
+ /* zero-filled page */
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+ }
+}
+
+static void __add_discard_entry(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc, unsigned int start, unsigned int end)
+{
+ struct list_head *head = &SM_I(sbi)->discard_list;
+ struct discard_entry *new, *last;
+
+ if (!list_empty(head)) {
+ last = list_last_entry(head, struct discard_entry, list);
+ if (START_BLOCK(sbi, cpc->trim_start) + start ==
+ last->blkaddr + last->len) {
+ last->len += end - start;
+ goto done;
+ }
+ }
+
+ new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
+ INIT_LIST_HEAD(&new->list);
+ new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
+ new->len = end - start;
+ list_add_tail(&new->list, head);
+done:
+ SM_I(sbi)->nr_discards += end - start;
+ cpc->trimmed += end - start;
+}
+
+static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
+ int max_blocks = sbi->blocks_per_seg;
+ struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
+ unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
+ unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+ unsigned long *dmap = SIT_I(sbi)->tmp_map;
+ unsigned int start = 0, end = -1;
+ bool force = (cpc->reason == CP_DISCARD);
+ int i;
+
+ if (!force && (!test_opt(sbi, DISCARD) ||
+ SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards))
+ return;
+
+ if (force && !se->valid_blocks) {
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ /*
+ * if this segment is registered in the prefree list, then
+ * we should skip adding a discard candidate, and let the
+ * checkpoint do that later.
+ */
+ mutex_lock(&dirty_i->seglist_lock);
+ if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
+ mutex_unlock(&dirty_i->seglist_lock);
+ cpc->trimmed += sbi->blocks_per_seg;
+ return;
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+
+ __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
+ return;
+ }
+
+ /* zero block will be discarded through the prefree list */
+ if (!se->valid_blocks || se->valid_blocks == max_blocks)
+ return;
+
+ /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
+ for (i = 0; i < entries; i++)
+ dmap[i] = force ? ~ckpt_map[i] :
+ (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
+
+ while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
+ start = __find_rev_next_bit(dmap, max_blocks, end + 1);
+ if (start >= max_blocks)
+ break;
+
+ end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
+
+ if (end - start < cpc->trim_minlen)
+ continue;
+
+ __add_discard_entry(sbi, cpc, start, end);
+ }
+}
+
+void release_discard_addrs(struct f2fs_sb_info *sbi)
+{
+ struct list_head *head = &(SM_I(sbi)->discard_list);
+ struct discard_entry *entry, *this;
+
+ /* drop caches */
+ list_for_each_entry_safe(entry, this, head, list) {
+ list_del(&entry->list);
+ kmem_cache_free(discard_entry_slab, entry);
+ }
+}
+
+/*
+ * Should call clear_prefree_segments after checkpoint is done.
+ */
+static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int segno;
+
+ mutex_lock(&dirty_i->seglist_lock);
+ for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
+ __set_test_and_free(sbi, segno);
+ mutex_unlock(&dirty_i->seglist_lock);
+}
+
+void clear_prefree_segments(struct f2fs_sb_info *sbi)
+{
+ struct list_head *head = &(SM_I(sbi)->discard_list);
+ struct discard_entry *entry, *this;
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
+ unsigned int start = 0, end = -1;
+
+ mutex_lock(&dirty_i->seglist_lock);
+
+ while (1) {
+ int i;
+ start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
+ if (start >= MAIN_SEGS(sbi))
+ break;
+ end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
+ start + 1);
+
+ for (i = start; i < end; i++)
+ clear_bit(i, prefree_map);
+
+ dirty_i->nr_dirty[PRE] -= end - start;
+
+ if (!test_opt(sbi, DISCARD))
+ continue;
+
+ f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
+ (end - start) << sbi->log_blocks_per_seg);
+ }
+ mutex_unlock(&dirty_i->seglist_lock);
+
+ /* send small discards */
+ list_for_each_entry_safe(entry, this, head, list) {
+ f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
+ list_del(&entry->list);
+ SM_I(sbi)->nr_discards -= entry->len;
+ kmem_cache_free(discard_entry_slab, entry);
+ }
+}
+
+static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+
+ if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
+ sit_i->dirty_sentries++;
+ return false;
+ }
+
+ return true;
+}
+
+static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
+ unsigned int segno, int modified)
+{
+ struct seg_entry *se = get_seg_entry(sbi, segno);
+ se->type = type;
+ if (modified)
+ __mark_sit_entry_dirty(sbi, segno);
+}
+
+static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
+{
+ struct seg_entry *se;
+ unsigned int segno, offset;
+ long int new_vblocks;
+
+ segno = GET_SEGNO(sbi, blkaddr);
+
+ se = get_seg_entry(sbi, segno);
+ new_vblocks = se->valid_blocks + del;
+ offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+
+ f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
+ (new_vblocks > sbi->blocks_per_seg)));
+
+ se->valid_blocks = new_vblocks;
+ se->mtime = get_mtime(sbi);
+ SIT_I(sbi)->max_mtime = se->mtime;
+
+ /* Update valid block bitmap */
+ if (del > 0) {
+ if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
+ f2fs_bug_on(sbi, 1);
+ } else {
+ if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
+ f2fs_bug_on(sbi, 1);
+ }
+ if (!f2fs_test_bit(offset, se->ckpt_valid_map))
+ se->ckpt_valid_blocks += del;
+
+ __mark_sit_entry_dirty(sbi, segno);
+
+ /* update total number of valid blocks to be written in ckpt area */
+ SIT_I(sbi)->written_valid_blocks += del;
+
+ if (sbi->segs_per_sec > 1)
+ get_sec_entry(sbi, segno)->valid_blocks += del;
+}
+
+void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
+{
+ update_sit_entry(sbi, new, 1);
+ if (GET_SEGNO(sbi, old) != NULL_SEGNO)
+ update_sit_entry(sbi, old, -1);
+
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
+}
+
+void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
+{
+ unsigned int segno = GET_SEGNO(sbi, addr);
+ struct sit_info *sit_i = SIT_I(sbi);
+
+ f2fs_bug_on(sbi, addr == NULL_ADDR);
+ if (addr == NEW_ADDR)
+ return;
+
+ /* add it into sit main buffer */
+ mutex_lock(&sit_i->sentry_lock);
+
+ update_sit_entry(sbi, addr, -1);
+
+ /* add it into dirty seglist */
+ locate_dirty_segment(sbi, segno);
+
+ mutex_unlock(&sit_i->sentry_lock);
+}
+
+/*
+ * This function should be resided under the curseg_mutex lock
+ */
+static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
+ struct f2fs_summary *sum)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ void *addr = curseg->sum_blk;
+ addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
+ memcpy(addr, sum, sizeof(struct f2fs_summary));
+}
+
+/*
+ * Calculate the number of current summary pages for writing
+ */
+int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
+{
+ int valid_sum_count = 0;
+ int i, sum_in_page;
+
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+ if (sbi->ckpt->alloc_type[i] == SSR)
+ valid_sum_count += sbi->blocks_per_seg;
+ else {
+ if (for_ra)
+ valid_sum_count += le16_to_cpu(
+ F2FS_CKPT(sbi)->cur_data_blkoff[i]);
+ else
+ valid_sum_count += curseg_blkoff(sbi, i);
+ }
+ }
+
+ sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
+ SUM_FOOTER_SIZE) / SUMMARY_SIZE;
+ if (valid_sum_count <= sum_in_page)
+ return 1;
+ else if ((valid_sum_count - sum_in_page) <=
+ (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
+ return 2;
+ return 3;
+}
+
+/*
+ * Caller should put this summary page
+ */
+struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
+}
+
+static void write_sum_page(struct f2fs_sb_info *sbi,
+ struct f2fs_summary_block *sum_blk, block_t blk_addr)
+{
+ struct page *page = grab_meta_page(sbi, blk_addr);
+ void *kaddr = page_address(page);
+ memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+}
+
+static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int segno = curseg->segno + 1;
+ struct free_segmap_info *free_i = FREE_I(sbi);
+
+ if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
+ return !test_bit(segno, free_i->free_segmap);
+ return 0;
+}
+
+/*
+ * Find a new segment from the free segments bitmap to right order
+ * This function should be returned with success, otherwise BUG
+ */
+static void get_new_segment(struct f2fs_sb_info *sbi,
+ unsigned int *newseg, bool new_sec, int dir)
+{
+ struct free_segmap_info *free_i = FREE_I(sbi);
+ unsigned int segno, secno, zoneno;
+ unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
+ unsigned int hint = *newseg / sbi->segs_per_sec;
+ unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
+ unsigned int left_start = hint;
+ bool init = true;
+ int go_left = 0;
+ int i;
+
+ spin_lock(&free_i->segmap_lock);
+
+ if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
+ segno = find_next_zero_bit(free_i->free_segmap,
+ MAIN_SEGS(sbi), *newseg + 1);
+ if (segno - *newseg < sbi->segs_per_sec -
+ (*newseg % sbi->segs_per_sec))
+ goto got_it;
+ }
+find_other_zone:
+ secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
+ if (secno >= MAIN_SECS(sbi)) {
+ if (dir == ALLOC_RIGHT) {
+ secno = find_next_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi), 0);
+ f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
+ } else {
+ go_left = 1;
+ left_start = hint - 1;
+ }
+ }
+ if (go_left == 0)
+ goto skip_left;
+
+ while (test_bit(left_start, free_i->free_secmap)) {
+ if (left_start > 0) {
+ left_start--;
+ continue;
+ }
+ left_start = find_next_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi), 0);
+ f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
+ break;
+ }
+ secno = left_start;
+skip_left:
+ hint = secno;
+ segno = secno * sbi->segs_per_sec;
+ zoneno = secno / sbi->secs_per_zone;
+
+ /* give up on finding another zone */
+ if (!init)
+ goto got_it;
+ if (sbi->secs_per_zone == 1)
+ goto got_it;
+ if (zoneno == old_zoneno)
+ goto got_it;
+ if (dir == ALLOC_LEFT) {
+ if (!go_left && zoneno + 1 >= total_zones)
+ goto got_it;
+ if (go_left && zoneno == 0)
+ goto got_it;
+ }
+ for (i = 0; i < NR_CURSEG_TYPE; i++)
+ if (CURSEG_I(sbi, i)->zone == zoneno)
+ break;
+
+ if (i < NR_CURSEG_TYPE) {
+ /* zone is in user, try another */
+ if (go_left)
+ hint = zoneno * sbi->secs_per_zone - 1;
+ else if (zoneno + 1 >= total_zones)
+ hint = 0;
+ else
+ hint = (zoneno + 1) * sbi->secs_per_zone;
+ init = false;
+ goto find_other_zone;
+ }
+got_it:
+ /* set it as dirty segment in free segmap */
+ f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
+ __set_inuse(sbi, segno);
+ *newseg = segno;
+ spin_unlock(&free_i->segmap_lock);
+}
+
+static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ struct summary_footer *sum_footer;
+
+ curseg->segno = curseg->next_segno;
+ curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
+ curseg->next_blkoff = 0;
+ curseg->next_segno = NULL_SEGNO;
+
+ sum_footer = &(curseg->sum_blk->footer);
+ memset(sum_footer, 0, sizeof(struct summary_footer));
+ if (IS_DATASEG(type))
+ SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
+ if (IS_NODESEG(type))
+ SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
+ __set_sit_entry_type(sbi, type, curseg->segno, modified);
+}
+
+/*
+ * Allocate a current working segment.
+ * This function always allocates a free segment in LFS manner.
+ */
+static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int segno = curseg->segno;
+ int dir = ALLOC_LEFT;
+
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, segno));
+ if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
+ dir = ALLOC_RIGHT;
+
+ if (test_opt(sbi, NOHEAP))
+ dir = ALLOC_RIGHT;
+
+ get_new_segment(sbi, &segno, new_sec, dir);
+ curseg->next_segno = segno;
+ reset_curseg(sbi, type, 1);
+ curseg->alloc_type = LFS;
+}
+
+static void __next_free_blkoff(struct f2fs_sb_info *sbi,
+ struct curseg_info *seg, block_t start)
+{
+ struct seg_entry *se = get_seg_entry(sbi, seg->segno);
+ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
+ unsigned long *target_map = SIT_I(sbi)->tmp_map;
+ unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
+ unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
+ int i, pos;
+
+ for (i = 0; i < entries; i++)
+ target_map[i] = ckpt_map[i] | cur_map[i];
+
+ pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
+
+ seg->next_blkoff = pos;
+}
+
+/*
+ * If a segment is written by LFS manner, next block offset is just obtained
+ * by increasing the current block offset. However, if a segment is written by
+ * SSR manner, next block offset obtained by calling __next_free_blkoff
+ */
+static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
+ struct curseg_info *seg)
+{
+ if (seg->alloc_type == SSR)
+ __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
+ else
+ seg->next_blkoff++;
+}
+
+/*
+ * This function always allocates a used segment(from dirty seglist) by SSR
+ * manner, so it should recover the existing segment information of valid blocks
+ */
+static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int new_segno = curseg->next_segno;
+ struct f2fs_summary_block *sum_node;
+ struct page *sum_page;
+
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+ __set_test_and_inuse(sbi, new_segno);
+
+ mutex_lock(&dirty_i->seglist_lock);
+ __remove_dirty_segment(sbi, new_segno, PRE);
+ __remove_dirty_segment(sbi, new_segno, DIRTY);
+ mutex_unlock(&dirty_i->seglist_lock);
+
+ reset_curseg(sbi, type, 1);
+ curseg->alloc_type = SSR;
+ __next_free_blkoff(sbi, curseg, 0);
+
+ if (reuse) {
+ sum_page = get_sum_page(sbi, new_segno);
+ sum_node = (struct f2fs_summary_block *)page_address(sum_page);
+ memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
+ f2fs_put_page(sum_page, 1);
+ }
+}
+
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
+
+ if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
+ return v_ops->get_victim(sbi,
+ &(curseg)->next_segno, BG_GC, type, SSR);
+
+ /* For data segments, let's do SSR more intensively */
+ for (; type >= CURSEG_HOT_DATA; type--)
+ if (v_ops->get_victim(sbi, &(curseg)->next_segno,
+ BG_GC, type, SSR))
+ return 1;
+ return 0;
+}
+
+/*
+ * flush out current segment and replace it with new segment
+ * This function should be returned with success, otherwise BUG
+ */
+static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
+ int type, bool force)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ if (force)
+ new_curseg(sbi, type, true);
+ else if (type == CURSEG_WARM_NODE)
+ new_curseg(sbi, type, false);
+ else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+ new_curseg(sbi, type, false);
+ else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
+ change_curseg(sbi, type, true);
+ else
+ new_curseg(sbi, type, false);
+
+ stat_inc_seg_type(sbi, curseg);
+}
+
+static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int old_segno;
+
+ old_segno = curseg->segno;
+ SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
+ locate_dirty_segment(sbi, old_segno);
+}
+
+void allocate_new_segments(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
+ __allocate_new_segments(sbi, i);
+}
+
+static const struct segment_allocation default_salloc_ops = {
+ .allocate_segment = allocate_segment_by_default,
+};
+
+int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
+{
+ __u64 start = F2FS_BYTES_TO_BLK(range->start);
+ __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
+ unsigned int start_segno, end_segno;
+ struct cp_control cpc;
+
+ if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
+ range->len < sbi->blocksize)
+ return -EINVAL;
+
+ cpc.trimmed = 0;
+ if (end <= MAIN_BLKADDR(sbi))
+ goto out;
+
+ /* start/end segment number in main_area */
+ start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
+ end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
+ GET_SEGNO(sbi, end);
+ cpc.reason = CP_DISCARD;
+ cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen);
+
+ /* do checkpoint to issue discard commands safely */
+ for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
+ cpc.trim_start = start_segno;
+ cpc.trim_end = min_t(unsigned int, rounddown(start_segno +
+ BATCHED_TRIM_SEGMENTS(sbi),
+ sbi->segs_per_sec) - 1, end_segno);
+
+ mutex_lock(&sbi->gc_mutex);
+ write_checkpoint(sbi, &cpc);
+ mutex_unlock(&sbi->gc_mutex);
+ }
+out:
+ range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
+ return 0;
+}
+
+static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ if (curseg->next_blkoff < sbi->blocks_per_seg)
+ return true;
+ return false;
+}
+
+static int __get_segment_type_2(struct page *page, enum page_type p_type)
+{
+ if (p_type == DATA)
+ return CURSEG_HOT_DATA;
+ else
+ return CURSEG_HOT_NODE;
+}
+
+static int __get_segment_type_4(struct page *page, enum page_type p_type)
+{
+ if (p_type == DATA) {
+ struct inode *inode = page->mapping->host;
+
+ if (S_ISDIR(inode->i_mode))
+ return CURSEG_HOT_DATA;
+ else
+ return CURSEG_COLD_DATA;
+ } else {
+ if (IS_DNODE(page) && is_cold_node(page))
+ return CURSEG_WARM_NODE;
+ else
+ return CURSEG_COLD_NODE;
+ }
+}
+
+static int __get_segment_type_6(struct page *page, enum page_type p_type)
+{
+ if (p_type == DATA) {
+ struct inode *inode = page->mapping->host;
+
+ if (S_ISDIR(inode->i_mode))
+ return CURSEG_HOT_DATA;
+ else if (is_cold_data(page) || file_is_cold(inode))
+ return CURSEG_COLD_DATA;
+ else
+ return CURSEG_WARM_DATA;
+ } else {
+ if (IS_DNODE(page))
+ return is_cold_node(page) ? CURSEG_WARM_NODE :
+ CURSEG_HOT_NODE;
+ else
+ return CURSEG_COLD_NODE;
+ }
+}
+
+static int __get_segment_type(struct page *page, enum page_type p_type)
+{
+ switch (F2FS_P_SB(page)->active_logs) {
+ case 2:
+ return __get_segment_type_2(page, p_type);
+ case 4:
+ return __get_segment_type_4(page, p_type);
+ }
+ /* NR_CURSEG_TYPE(6) logs by default */
+ f2fs_bug_on(F2FS_P_SB(page),
+ F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
+ return __get_segment_type_6(page, p_type);
+}
+
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+ block_t old_blkaddr, block_t *new_blkaddr,
+ struct f2fs_summary *sum, int type)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct curseg_info *curseg;
+ bool direct_io = (type == CURSEG_DIRECT_IO);
+
+ type = direct_io ? CURSEG_WARM_DATA : type;
+
+ curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+
+ /* direct_io'ed data is aligned to the segment for better performance */
+ if (direct_io && curseg->next_blkoff)
+ __allocate_new_segments(sbi, type);
+
+ *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
+ /*
+ * __add_sum_entry should be resided under the curseg_mutex
+ * because, this function updates a summary entry in the
+ * current summary block.
+ */
+ __add_sum_entry(sbi, type, sum);
+
+ mutex_lock(&sit_i->sentry_lock);
+ __refresh_next_blkoff(sbi, curseg);
+
+ stat_inc_block_count(sbi, curseg);
+
+ if (!__has_curseg_space(sbi, type))
+ sit_i->s_ops->allocate_segment(sbi, type, false);
+ /*
+ * SIT information should be updated before segment allocation,
+ * since SSR needs latest valid block information.
+ */
+ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
+
+ mutex_unlock(&sit_i->sentry_lock);
+
+ if (page && IS_NODESEG(type))
+ fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
+
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
+ struct f2fs_summary *sum,
+ struct f2fs_io_info *fio)
+{
+ int type = __get_segment_type(page, fio->type);
+
+ allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type);
+
+ /* writeout dirty page into bdev */
+ f2fs_submit_page_mbio(sbi, page, fio);
+}
+
+void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
+{
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = WRITE_SYNC | REQ_META | REQ_PRIO,
+ .blk_addr = page->index,
+ };
+
+ set_page_writeback(page);
+ f2fs_submit_page_mbio(sbi, page, &fio);
+}
+
+void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
+ unsigned int nid, struct f2fs_io_info *fio)
+{
+ struct f2fs_summary sum;
+ set_summary(&sum, nid, 0, 0);
+ do_write_page(sbi, page, &sum, fio);
+}
+
+void write_data_page(struct page *page, struct dnode_of_data *dn,
+ struct f2fs_io_info *fio)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+ struct f2fs_summary sum;
+ struct node_info ni;
+
+ f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
+ get_node_info(sbi, dn->nid, &ni);
+ set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+ do_write_page(sbi, page, &sum, fio);
+ dn->data_blkaddr = fio->blk_addr;
+}
+
+void rewrite_data_page(struct page *page, struct f2fs_io_info *fio)
+{
+ stat_inc_inplace_blocks(F2FS_P_SB(page));
+ f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio);
+}
+
+void recover_data_page(struct f2fs_sb_info *sbi,
+ struct page *page, struct f2fs_summary *sum,
+ block_t old_blkaddr, block_t new_blkaddr)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct curseg_info *curseg;
+ unsigned int segno, old_cursegno;
+ struct seg_entry *se;
+ int type;
+
+ segno = GET_SEGNO(sbi, new_blkaddr);
+ se = get_seg_entry(sbi, segno);
+ type = se->type;
+
+ if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
+ if (old_blkaddr == NULL_ADDR)
+ type = CURSEG_COLD_DATA;
+ else
+ type = CURSEG_WARM_DATA;
+ }
+ curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ mutex_lock(&sit_i->sentry_lock);
+
+ old_cursegno = curseg->segno;
+
+ /* change the current segment */
+ if (segno != curseg->segno) {
+ curseg->next_segno = segno;
+ change_curseg(sbi, type, true);
+ }
+
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
+ __add_sum_entry(sbi, type, sum);
+
+ refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
+ locate_dirty_segment(sbi, old_cursegno);
+
+ mutex_unlock(&sit_i->sentry_lock);
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+static inline bool is_merged_page(struct f2fs_sb_info *sbi,
+ struct page *page, enum page_type type)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = &sbi->write_io[btype];
+ struct bio_vec *bvec;
+ int i;
+
+ down_read(&io->io_rwsem);
+ if (!io->bio)
+ goto out;
+
+ __bio_for_each_segment(bvec, io->bio, i, 0) {
+ if (page == bvec->bv_page) {
+ up_read(&io->io_rwsem);
+ return true;
+ }
+ }
+
+out:
+ up_read(&io->io_rwsem);
+ return false;
+}
+
+void f2fs_wait_on_page_writeback(struct page *page,
+ enum page_type type)
+{
+ if (PageWriteback(page)) {
+ struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+
+ if (is_merged_page(sbi, page, type))
+ f2fs_submit_merged_bio(sbi, type, WRITE);
+ wait_on_page_writeback(page);
+ }
+}
+
+static int read_compacted_summaries(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct curseg_info *seg_i;
+ unsigned char *kaddr;
+ struct page *page;
+ block_t start;
+ int i, j, offset;
+
+ start = start_sum_block(sbi);
+
+ page = get_meta_page(sbi, start++);
+ kaddr = (unsigned char *)page_address(page);
+
+ /* Step 1: restore nat cache */
+ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
+ memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
+
+ /* Step 2: restore sit cache */
+ seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
+ memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
+ SUM_JOURNAL_SIZE);
+ offset = 2 * SUM_JOURNAL_SIZE;
+
+ /* Step 3: restore summary entries */
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+ unsigned short blk_off;
+ unsigned int segno;
+
+ seg_i = CURSEG_I(sbi, i);
+ segno = le32_to_cpu(ckpt->cur_data_segno[i]);
+ blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
+ seg_i->next_segno = segno;
+ reset_curseg(sbi, i, 0);
+ seg_i->alloc_type = ckpt->alloc_type[i];
+ seg_i->next_blkoff = blk_off;
+
+ if (seg_i->alloc_type == SSR)
+ blk_off = sbi->blocks_per_seg;
+
+ for (j = 0; j < blk_off; j++) {
+ struct f2fs_summary *s;
+ s = (struct f2fs_summary *)(kaddr + offset);
+ seg_i->sum_blk->entries[j] = *s;
+ offset += SUMMARY_SIZE;
+ if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+ SUM_FOOTER_SIZE)
+ continue;
+
+ f2fs_put_page(page, 1);
+ page = NULL;
+
+ page = get_meta_page(sbi, start++);
+ kaddr = (unsigned char *)page_address(page);
+ offset = 0;
+ }
+ }
+ f2fs_put_page(page, 1);
+ return 0;
+}
+
+static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
+{
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct f2fs_summary_block *sum;
+ struct curseg_info *curseg;
+ struct page *new;
+ unsigned short blk_off;
+ unsigned int segno = 0;
+ block_t blk_addr = 0;
+
+ /* get segment number and block addr */
+ if (IS_DATASEG(type)) {
+ segno = le32_to_cpu(ckpt->cur_data_segno[type]);
+ blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
+ CURSEG_HOT_DATA]);
+ if (__exist_node_summaries(sbi))
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
+ else
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
+ } else {
+ segno = le32_to_cpu(ckpt->cur_node_segno[type -
+ CURSEG_HOT_NODE]);
+ blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
+ CURSEG_HOT_NODE]);
+ if (__exist_node_summaries(sbi))
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
+ type - CURSEG_HOT_NODE);
+ else
+ blk_addr = GET_SUM_BLOCK(sbi, segno);
+ }
+
+ new = get_meta_page(sbi, blk_addr);
+ sum = (struct f2fs_summary_block *)page_address(new);
+
+ if (IS_NODESEG(type)) {
+ if (__exist_node_summaries(sbi)) {
+ struct f2fs_summary *ns = &sum->entries[0];
+ int i;
+ for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
+ ns->version = 0;
+ ns->ofs_in_node = 0;
+ }
+ } else {
+ int err;
+
+ err = restore_node_summary(sbi, segno, sum);
+ if (err) {
+ f2fs_put_page(new, 1);
+ return err;
+ }
+ }
+ }
+
+ /* set uncompleted segment to curseg */
+ curseg = CURSEG_I(sbi, type);
+ mutex_lock(&curseg->curseg_mutex);
+ memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
+ curseg->next_segno = segno;
+ reset_curseg(sbi, type, 0);
+ curseg->alloc_type = ckpt->alloc_type[type];
+ curseg->next_blkoff = blk_off;
+ mutex_unlock(&curseg->curseg_mutex);
+ f2fs_put_page(new, 1);
+ return 0;
+}
+
+static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
+{
+ int type = CURSEG_HOT_DATA;
+ int err;
+
+ if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
+ int npages = npages_for_summary_flush(sbi, true);
+
+ if (npages >= 2)
+ ra_meta_pages(sbi, start_sum_block(sbi), npages,
+ META_CP);
+
+ /* restore for compacted data summary */
+ if (read_compacted_summaries(sbi))
+ return -EINVAL;
+ type = CURSEG_HOT_NODE;
+ }
+
+ if (__exist_node_summaries(sbi))
+ ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
+ NR_CURSEG_TYPE - type, META_CP);
+
+ for (; type <= CURSEG_COLD_NODE; type++) {
+ err = read_normal_summaries(sbi, type);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+ struct page *page;
+ unsigned char *kaddr;
+ struct f2fs_summary *summary;
+ struct curseg_info *seg_i;
+ int written_size = 0;
+ int i, j;
+
+ page = grab_meta_page(sbi, blkaddr++);
+ kaddr = (unsigned char *)page_address(page);
+
+ /* Step 1: write nat cache */
+ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
+ memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
+ written_size += SUM_JOURNAL_SIZE;
+
+ /* Step 2: write sit cache */
+ seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
+ memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
+ SUM_JOURNAL_SIZE);
+ written_size += SUM_JOURNAL_SIZE;
+
+ /* Step 3: write summary entries */
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+ unsigned short blkoff;
+ seg_i = CURSEG_I(sbi, i);
+ if (sbi->ckpt->alloc_type[i] == SSR)
+ blkoff = sbi->blocks_per_seg;
+ else
+ blkoff = curseg_blkoff(sbi, i);
+
+ for (j = 0; j < blkoff; j++) {
+ if (!page) {
+ page = grab_meta_page(sbi, blkaddr++);
+ kaddr = (unsigned char *)page_address(page);
+ written_size = 0;
+ }
+ summary = (struct f2fs_summary *)(kaddr + written_size);
+ *summary = seg_i->sum_blk->entries[j];
+ written_size += SUMMARY_SIZE;
+
+ if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+ SUM_FOOTER_SIZE)
+ continue;
+
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+ page = NULL;
+ }
+ }
+ if (page) {
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+ }
+}
+
+static void write_normal_summaries(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ int i, end;
+ if (IS_DATASEG(type))
+ end = type + NR_CURSEG_DATA_TYPE;
+ else
+ end = type + NR_CURSEG_NODE_TYPE;
+
+ for (i = type; i < end; i++) {
+ struct curseg_info *sum = CURSEG_I(sbi, i);
+ mutex_lock(&sum->curseg_mutex);
+ write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
+ mutex_unlock(&sum->curseg_mutex);
+ }
+}
+
+void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
+{
+ if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
+ write_compacted_summaries(sbi, start_blk);
+ else
+ write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
+}
+
+void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
+{
+ write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
+}
+
+int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
+ unsigned int val, int alloc)
+{
+ int i;
+
+ if (type == NAT_JOURNAL) {
+ for (i = 0; i < nats_in_cursum(sum); i++) {
+ if (le32_to_cpu(nid_in_journal(sum, i)) == val)
+ return i;
+ }
+ if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
+ return update_nats_in_cursum(sum, 1);
+ } else if (type == SIT_JOURNAL) {
+ for (i = 0; i < sits_in_cursum(sum); i++)
+ if (le32_to_cpu(segno_in_journal(sum, i)) == val)
+ return i;
+ if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
+ return update_sits_in_cursum(sum, 1);
+ }
+ return -1;
+}
+
+static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return get_meta_page(sbi, current_sit_addr(sbi, segno));
+}
+
+static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
+ unsigned int start)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct page *src_page, *dst_page;
+ pgoff_t src_off, dst_off;
+ void *src_addr, *dst_addr;
+
+ src_off = current_sit_addr(sbi, start);
+ dst_off = next_sit_addr(sbi, src_off);
+
+ /* get current sit block page without lock */
+ src_page = get_meta_page(sbi, src_off);
+ dst_page = grab_meta_page(sbi, dst_off);
+ f2fs_bug_on(sbi, PageDirty(src_page));
+
+ src_addr = page_address(src_page);
+ dst_addr = page_address(dst_page);
+ memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+
+ set_page_dirty(dst_page);
+ f2fs_put_page(src_page, 1);
+
+ set_to_next_sit(sit_i, start);
+
+ return dst_page;
+}
+
+static struct sit_entry_set *grab_sit_entry_set(void)
+{
+ struct sit_entry_set *ses =
+ f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
+
+ ses->entry_cnt = 0;
+ INIT_LIST_HEAD(&ses->set_list);
+ return ses;
+}
+
+static void release_sit_entry_set(struct sit_entry_set *ses)
+{
+ list_del(&ses->set_list);
+ kmem_cache_free(sit_entry_set_slab, ses);
+}
+
+static void adjust_sit_entry_set(struct sit_entry_set *ses,
+ struct list_head *head)
+{
+ struct sit_entry_set *next = ses;
+
+ if (list_is_last(&ses->set_list, head))
+ return;
+
+ list_for_each_entry_continue(next, head, set_list)
+ if (ses->entry_cnt <= next->entry_cnt)
+ break;
+
+ list_move_tail(&ses->set_list, &next->set_list);
+}
+
+static void add_sit_entry(unsigned int segno, struct list_head *head)
+{
+ struct sit_entry_set *ses;
+ unsigned int start_segno = START_SEGNO(segno);
+
+ list_for_each_entry(ses, head, set_list) {
+ if (ses->start_segno == start_segno) {
+ ses->entry_cnt++;
+ adjust_sit_entry_set(ses, head);
+ return;
+ }
+ }
+
+ ses = grab_sit_entry_set();
+
+ ses->start_segno = start_segno;
+ ses->entry_cnt++;
+ list_add(&ses->set_list, head);
+}
+
+static void add_sits_in_set(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_sm_info *sm_info = SM_I(sbi);
+ struct list_head *set_list = &sm_info->sit_entry_set;
+ unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
+ unsigned int segno;
+
+ for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
+ add_sit_entry(segno, set_list);
+}
+
+static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
+ struct f2fs_summary_block *sum = curseg->sum_blk;
+ int i;
+
+ for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
+ unsigned int segno;
+ bool dirtied;
+
+ segno = le32_to_cpu(segno_in_journal(sum, i));
+ dirtied = __mark_sit_entry_dirty(sbi, segno);
+
+ if (!dirtied)
+ add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
+ }
+ update_sits_in_cursum(sum, -sits_in_cursum(sum));
+}
+
+/*
+ * CP calls this function, which flushes SIT entries including sit_journal,
+ * and moves prefree segs to free segs.
+ */
+void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
+ struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct sit_entry_set *ses, *tmp;
+ struct list_head *head = &SM_I(sbi)->sit_entry_set;
+ bool to_journal = true;
+ struct seg_entry *se;
+
+ mutex_lock(&curseg->curseg_mutex);
+ mutex_lock(&sit_i->sentry_lock);
+
+ /*
+ * add and account sit entries of dirty bitmap in sit entry
+ * set temporarily
+ */
+ add_sits_in_set(sbi);
+
+ /*
+ * if there are no enough space in journal to store dirty sit
+ * entries, remove all entries from journal and add and account
+ * them in sit entry set.
+ */
+ if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
+ remove_sits_in_journal(sbi);
+
+ if (!sit_i->dirty_sentries)
+ goto out;
+
+ /*
+ * there are two steps to flush sit entries:
+ * #1, flush sit entries to journal in current cold data summary block.
+ * #2, flush sit entries to sit page.
+ */
+ list_for_each_entry_safe(ses, tmp, head, set_list) {
+ struct page *page = NULL;
+ struct f2fs_sit_block *raw_sit = NULL;
+ unsigned int start_segno = ses->start_segno;
+ unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
+ (unsigned long)MAIN_SEGS(sbi));
+ unsigned int segno = start_segno;
+
+ if (to_journal &&
+ !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
+ to_journal = false;
+
+ if (!to_journal) {
+ page = get_next_sit_page(sbi, start_segno);
+ raw_sit = page_address(page);
+ }
+
+ /* flush dirty sit entries in region of current sit set */
+ for_each_set_bit_from(segno, bitmap, end) {
+ int offset, sit_offset;
+
+ se = get_seg_entry(sbi, segno);
+
+ /* add discard candidates */
+ if (cpc->reason != CP_DISCARD) {
+ cpc->trim_start = segno;
+ add_discard_addrs(sbi, cpc);
+ }
+
+ if (to_journal) {
+ offset = lookup_journal_in_cursum(sum,
+ SIT_JOURNAL, segno, 1);
+ f2fs_bug_on(sbi, offset < 0);
+ segno_in_journal(sum, offset) =
+ cpu_to_le32(segno);
+ seg_info_to_raw_sit(se,
+ &sit_in_journal(sum, offset));
+ } else {
+ sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
+ seg_info_to_raw_sit(se,
+ &raw_sit->entries[sit_offset]);
+ }
+
+ __clear_bit(segno, bitmap);
+ sit_i->dirty_sentries--;
+ ses->entry_cnt--;
+ }
+
+ if (!to_journal)
+ f2fs_put_page(page, 1);
+
+ f2fs_bug_on(sbi, ses->entry_cnt);
+ release_sit_entry_set(ses);
+ }
+
+ f2fs_bug_on(sbi, !list_empty(head));
+ f2fs_bug_on(sbi, sit_i->dirty_sentries);
+out:
+ if (cpc->reason == CP_DISCARD) {
+ for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
+ add_discard_addrs(sbi, cpc);
+ }
+ mutex_unlock(&sit_i->sentry_lock);
+ mutex_unlock(&curseg->curseg_mutex);
+
+ set_prefree_as_free_segments(sbi);
+}
+
+static int build_sit_info(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct sit_info *sit_i;
+ unsigned int sit_segs, start;
+ char *src_bitmap, *dst_bitmap;
+ unsigned int bitmap_size;
+
+ /* allocate memory for SIT information */
+ sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
+ if (!sit_i)
+ return -ENOMEM;
+
+ SM_I(sbi)->sit_info = sit_i;
+
+ sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
+ if (!sit_i->sentries)
+ return -ENOMEM;
+
+ bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
+ sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+ if (!sit_i->dirty_sentries_bitmap)
+ return -ENOMEM;
+
+ for (start = 0; start < MAIN_SEGS(sbi); start++) {
+ sit_i->sentries[start].cur_valid_map
+ = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+ sit_i->sentries[start].ckpt_valid_map
+ = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+ if (!sit_i->sentries[start].cur_valid_map
+ || !sit_i->sentries[start].ckpt_valid_map)
+ return -ENOMEM;
+ }
+
+ sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+ if (!sit_i->tmp_map)
+ return -ENOMEM;
+
+ if (sbi->segs_per_sec > 1) {
+ sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
+ sizeof(struct sec_entry));
+ if (!sit_i->sec_entries)
+ return -ENOMEM;
+ }
+
+ /* get information related with SIT */
+ sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
+
+ /* setup SIT bitmap from ckeckpoint pack */
+ bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
+ src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
+
+ dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+ if (!dst_bitmap)
+ return -ENOMEM;
+
+ /* init SIT information */
+ sit_i->s_ops = &default_salloc_ops;
+
+ sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
+ sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
+ sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
+ sit_i->sit_bitmap = dst_bitmap;
+ sit_i->bitmap_size = bitmap_size;
+ sit_i->dirty_sentries = 0;
+ sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
+ sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
+ sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
+ mutex_init(&sit_i->sentry_lock);
+ return 0;
+}
+
+static int build_free_segmap(struct f2fs_sb_info *sbi)
+{
+ struct free_segmap_info *free_i;
+ unsigned int bitmap_size, sec_bitmap_size;
+
+ /* allocate memory for free segmap information */
+ free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
+ if (!free_i)
+ return -ENOMEM;
+
+ SM_I(sbi)->free_info = free_i;
+
+ bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
+ free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
+ if (!free_i->free_segmap)
+ return -ENOMEM;
+
+ sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
+ free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
+ if (!free_i->free_secmap)
+ return -ENOMEM;
+
+ /* set all segments as dirty temporarily */
+ memset(free_i->free_segmap, 0xff, bitmap_size);
+ memset(free_i->free_secmap, 0xff, sec_bitmap_size);
+
+ /* init free segmap information */
+ free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
+ free_i->free_segments = 0;
+ free_i->free_sections = 0;
+ spin_lock_init(&free_i->segmap_lock);
+ return 0;
+}
+
+static int build_curseg(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *array;
+ int i;
+
+ array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
+ if (!array)
+ return -ENOMEM;
+
+ SM_I(sbi)->curseg_array = array;
+
+ for (i = 0; i < NR_CURSEG_TYPE; i++) {
+ mutex_init(&array[i].curseg_mutex);
+ array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ if (!array[i].sum_blk)
+ return -ENOMEM;
+ array[i].segno = NULL_SEGNO;
+ array[i].next_blkoff = 0;
+ }
+ return restore_curseg_summaries(sbi);
+}
+
+static void build_sit_entries(struct f2fs_sb_info *sbi)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
+ struct f2fs_summary_block *sum = curseg->sum_blk;
+ int sit_blk_cnt = SIT_BLK_CNT(sbi);
+ unsigned int i, start, end;
+ unsigned int readed, start_blk = 0;
+ int nrpages = MAX_BIO_BLOCKS(sbi);
+
+ do {
+ readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
+
+ start = start_blk * sit_i->sents_per_block;
+ end = (start_blk + readed) * sit_i->sents_per_block;
+
+ for (; start < end && start < MAIN_SEGS(sbi); start++) {
+ struct seg_entry *se = &sit_i->sentries[start];
+ struct f2fs_sit_block *sit_blk;
+ struct f2fs_sit_entry sit;
+ struct page *page;
+
+ mutex_lock(&curseg->curseg_mutex);
+ for (i = 0; i < sits_in_cursum(sum); i++) {
+ if (le32_to_cpu(segno_in_journal(sum, i))
+ == start) {
+ sit = sit_in_journal(sum, i);
+ mutex_unlock(&curseg->curseg_mutex);
+ goto got_it;
+ }
+ }
+ mutex_unlock(&curseg->curseg_mutex);
+
+ page = get_current_sit_page(sbi, start);
+ sit_blk = (struct f2fs_sit_block *)page_address(page);
+ sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
+ f2fs_put_page(page, 1);
+got_it:
+ check_block_count(sbi, start, &sit);
+ seg_info_from_raw_sit(se, &sit);
+ if (sbi->segs_per_sec > 1) {
+ struct sec_entry *e = get_sec_entry(sbi, start);
+ e->valid_blocks += se->valid_blocks;
+ }
+ }
+ start_blk += readed;
+ } while (start_blk < sit_blk_cnt);
+}
+
+static void init_free_segmap(struct f2fs_sb_info *sbi)
+{
+ unsigned int start;
+ int type;
+
+ for (start = 0; start < MAIN_SEGS(sbi); start++) {
+ struct seg_entry *sentry = get_seg_entry(sbi, start);
+ if (!sentry->valid_blocks)
+ __set_free(sbi, start);
+ }
+
+ /* set use the current segments */
+ for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
+ struct curseg_info *curseg_t = CURSEG_I(sbi, type);
+ __set_test_and_inuse(sbi, curseg_t->segno);
+ }
+}
+
+static void init_dirty_segmap(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ struct free_segmap_info *free_i = FREE_I(sbi);
+ unsigned int segno = 0, offset = 0;
+ unsigned short valid_blocks;
+
+ while (1) {
+ /* find dirty segment based on free segmap */
+ segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
+ if (segno >= MAIN_SEGS(sbi))
+ break;
+ offset = segno + 1;
+ valid_blocks = get_valid_blocks(sbi, segno, 0);
+ if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
+ continue;
+ if (valid_blocks > sbi->blocks_per_seg) {
+ f2fs_bug_on(sbi, 1);
+ continue;
+ }
+ mutex_lock(&dirty_i->seglist_lock);
+ __locate_dirty_segment(sbi, segno, DIRTY);
+ mutex_unlock(&dirty_i->seglist_lock);
+ }
+}
+
+static int init_victim_secmap(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
+
+ dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
+ if (!dirty_i->victim_secmap)
+ return -ENOMEM;
+ return 0;
+}
+
+static int build_dirty_segmap(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i;
+ unsigned int bitmap_size, i;
+
+ /* allocate memory for dirty segments list information */
+ dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
+ if (!dirty_i)
+ return -ENOMEM;
+
+ SM_I(sbi)->dirty_info = dirty_i;
+ mutex_init(&dirty_i->seglist_lock);
+
+ bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
+
+ for (i = 0; i < NR_DIRTY_TYPE; i++) {
+ dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
+ if (!dirty_i->dirty_segmap[i])
+ return -ENOMEM;
+ }
+
+ init_dirty_segmap(sbi);
+ return init_victim_secmap(sbi);
+}
+
+/*
+ * Update min, max modified time for cost-benefit GC algorithm
+ */
+static void init_min_max_mtime(struct f2fs_sb_info *sbi)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ unsigned int segno;
+
+ mutex_lock(&sit_i->sentry_lock);
+
+ sit_i->min_mtime = LLONG_MAX;
+
+ for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+ unsigned int i;
+ unsigned long long mtime = 0;
+
+ for (i = 0; i < sbi->segs_per_sec; i++)
+ mtime += get_seg_entry(sbi, segno + i)->mtime;
+
+ mtime = div_u64(mtime, sbi->segs_per_sec);
+
+ if (sit_i->min_mtime > mtime)
+ sit_i->min_mtime = mtime;
+ }
+ sit_i->max_mtime = get_mtime(sbi);
+ mutex_unlock(&sit_i->sentry_lock);
+}
+
+int build_segment_manager(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct f2fs_sm_info *sm_info;
+ int err;
+
+ sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
+ if (!sm_info)
+ return -ENOMEM;
+
+ /* init sm info */
+ sbi->sm_info = sm_info;
+ sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
+ sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
+ sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
+ sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
+ sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
+ sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
+ sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
+ sm_info->rec_prefree_segments = sm_info->main_segments *
+ DEF_RECLAIM_PREFREE_SEGMENTS / 100;
+ sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
+ sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
+ sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+
+ INIT_LIST_HEAD(&sm_info->discard_list);
+ sm_info->nr_discards = 0;
+ sm_info->max_discards = 0;
+
+ sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
+
+ INIT_LIST_HEAD(&sm_info->sit_entry_set);
+
+ if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ return err;
+ }
+
+ err = build_sit_info(sbi);
+ if (err)
+ return err;
+ err = build_free_segmap(sbi);
+ if (err)
+ return err;
+ err = build_curseg(sbi);
+ if (err)
+ return err;
+
+ /* reinit free segmap based on SIT */
+ build_sit_entries(sbi);
+
+ init_free_segmap(sbi);
+ err = build_dirty_segmap(sbi);
+ if (err)
+ return err;
+
+ init_min_max_mtime(sbi);
+ return 0;
+}
+
+static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
+ enum dirty_type dirty_type)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
+ mutex_lock(&dirty_i->seglist_lock);
+ kfree(dirty_i->dirty_segmap[dirty_type]);
+ dirty_i->nr_dirty[dirty_type] = 0;
+ mutex_unlock(&dirty_i->seglist_lock);
+}
+
+static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ kfree(dirty_i->victim_secmap);
+}
+
+static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
+{
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ int i;
+
+ if (!dirty_i)
+ return;
+
+ /* discard pre-free/dirty segments list */
+ for (i = 0; i < NR_DIRTY_TYPE; i++)
+ discard_dirty_segmap(sbi, i);
+
+ destroy_victim_secmap(sbi);
+ SM_I(sbi)->dirty_info = NULL;
+ kfree(dirty_i);
+}
+
+static void destroy_curseg(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *array = SM_I(sbi)->curseg_array;
+ int i;
+
+ if (!array)
+ return;
+ SM_I(sbi)->curseg_array = NULL;
+ for (i = 0; i < NR_CURSEG_TYPE; i++)
+ kfree(array[i].sum_blk);
+ kfree(array);
+}
+
+static void destroy_free_segmap(struct f2fs_sb_info *sbi)
+{
+ struct free_segmap_info *free_i = SM_I(sbi)->free_info;
+ if (!free_i)
+ return;
+ SM_I(sbi)->free_info = NULL;
+ kfree(free_i->free_segmap);
+ kfree(free_i->free_secmap);
+ kfree(free_i);
+}
+
+static void destroy_sit_info(struct f2fs_sb_info *sbi)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ unsigned int start;
+
+ if (!sit_i)
+ return;
+
+ if (sit_i->sentries) {
+ for (start = 0; start < MAIN_SEGS(sbi); start++) {
+ kfree(sit_i->sentries[start].cur_valid_map);
+ kfree(sit_i->sentries[start].ckpt_valid_map);
+ }
+ }
+ kfree(sit_i->tmp_map);
+
+ vfree(sit_i->sentries);
+ vfree(sit_i->sec_entries);
+ kfree(sit_i->dirty_sentries_bitmap);
+
+ SM_I(sbi)->sit_info = NULL;
+ kfree(sit_i->sit_bitmap);
+ kfree(sit_i);
+}
+
+void destroy_segment_manager(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_sm_info *sm_info = SM_I(sbi);
+
+ if (!sm_info)
+ return;
+ destroy_flush_cmd_control(sbi);
+ destroy_dirty_segmap(sbi);
+ destroy_curseg(sbi);
+ destroy_free_segmap(sbi);
+ destroy_sit_info(sbi);
+ sbi->sm_info = NULL;
+ kfree(sm_info);
+}
+
+int __init create_segment_manager_caches(void)
+{
+ discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
+ sizeof(struct discard_entry));
+ if (!discard_entry_slab)
+ goto fail;
+
+ sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
+ sizeof(struct sit_entry_set));
+ if (!sit_entry_set_slab)
+ goto destory_discard_entry;
+
+ inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
+ sizeof(struct inmem_pages));
+ if (!inmem_entry_slab)
+ goto destroy_sit_entry_set;
+ return 0;
+
+destroy_sit_entry_set:
+ kmem_cache_destroy(sit_entry_set_slab);
+destory_discard_entry:
+ kmem_cache_destroy(discard_entry_slab);
+fail:
+ return -ENOMEM;
+}
+
+void destroy_segment_manager_caches(void)
+{
+ kmem_cache_destroy(sit_entry_set_slab);
+ kmem_cache_destroy(discard_entry_slab);
+ kmem_cache_destroy(inmem_entry_slab);
+}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
new file mode 100644
index 0000000..7fd3511
--- /dev/null
+++ b/fs/f2fs/segment.h
@@ -0,0 +1,750 @@
+/*
+ * fs/f2fs/segment.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/blkdev.h>
+
+/* constant macro */
+#define NULL_SEGNO ((unsigned int)(~0))
+#define NULL_SECNO ((unsigned int)(~0))
+
+#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
+
+/* L: Logical segment # in volume, R: Relative segment # in main area */
+#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
+#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
+
+#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA)
+#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE)
+
+#define IS_CURSEG(sbi, seg) \
+ ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
+ (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
+
+#define IS_CURSEC(sbi, secno) \
+ ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
+ sbi->segs_per_sec) || \
+ (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
+ sbi->segs_per_sec) || \
+ (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
+ sbi->segs_per_sec) || \
+ (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
+ sbi->segs_per_sec) || \
+ (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
+ sbi->segs_per_sec) || \
+ (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
+ sbi->segs_per_sec)) \
+
+#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
+#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
+
+#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
+#define MAIN_SECS(sbi) (sbi->total_sections)
+
+#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
+#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg)
+
+#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
+#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \
+ sbi->log_blocks_per_seg))
+
+#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \
+ (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg))
+
+#define NEXT_FREE_BLKADDR(sbi, curseg) \
+ (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff)
+
+#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi))
+#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
+ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
+#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
+ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
+
+#define GET_SEGNO(sbi, blk_addr) \
+ (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
+ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
+ GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
+#define GET_SECNO(sbi, segno) \
+ ((segno) / sbi->segs_per_sec)
+#define GET_ZONENO_FROM_SEGNO(sbi, segno) \
+ ((segno / sbi->segs_per_sec) / sbi->secs_per_zone)
+
+#define GET_SUM_BLOCK(sbi, segno) \
+ ((sbi->sm_info->ssa_blkaddr) + segno)
+
+#define GET_SUM_TYPE(footer) ((footer)->entry_type)
+#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type)
+
+#define SIT_ENTRY_OFFSET(sit_i, segno) \
+ (segno % sit_i->sents_per_block)
+#define SIT_BLOCK_OFFSET(segno) \
+ (segno / SIT_ENTRY_PER_BLOCK)
+#define START_SEGNO(segno) \
+ (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
+#define SIT_BLK_CNT(sbi) \
+ ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
+#define f2fs_bitmap_size(nr) \
+ (BITS_TO_LONGS(nr) * sizeof(unsigned long))
+
+#define SECTOR_FROM_BLOCK(blk_addr) \
+ (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
+#define SECTOR_TO_BLOCK(sectors) \
+ (sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
+#define MAX_BIO_BLOCKS(sbi) \
+ ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES))
+
+/*
+ * indicate a block allocation direction: RIGHT and LEFT.
+ * RIGHT means allocating new sections towards the end of volume.
+ * LEFT means the opposite direction.
+ */
+enum {
+ ALLOC_RIGHT = 0,
+ ALLOC_LEFT
+};
+
+/*
+ * In the victim_sel_policy->alloc_mode, there are two block allocation modes.
+ * LFS writes data sequentially with cleaning operations.
+ * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
+ */
+enum {
+ LFS = 0,
+ SSR
+};
+
+/*
+ * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
+ * GC_CB is based on cost-benefit algorithm.
+ * GC_GREEDY is based on greedy algorithm.
+ */
+enum {
+ GC_CB = 0,
+ GC_GREEDY
+};
+
+/*
+ * BG_GC means the background cleaning job.
+ * FG_GC means the on-demand cleaning job.
+ */
+enum {
+ BG_GC = 0,
+ FG_GC
+};
+
+/* for a function parameter to select a victim segment */
+struct victim_sel_policy {
+ int alloc_mode; /* LFS or SSR */
+ int gc_mode; /* GC_CB or GC_GREEDY */
+ unsigned long *dirty_segmap; /* dirty segment bitmap */
+ unsigned int max_search; /* maximum # of segments to search */
+ unsigned int offset; /* last scanned bitmap offset */
+ unsigned int ofs_unit; /* bitmap search unit */
+ unsigned int min_cost; /* minimum cost */
+ unsigned int min_segno; /* segment # having min. cost */
+};
+
+struct seg_entry {
+ unsigned short valid_blocks; /* # of valid blocks */
+ unsigned char *cur_valid_map; /* validity bitmap of blocks */
+ /*
+ * # of valid blocks and the validity bitmap stored in the the last
+ * checkpoint pack. This information is used by the SSR mode.
+ */
+ unsigned short ckpt_valid_blocks;
+ unsigned char *ckpt_valid_map;
+ unsigned char type; /* segment type like CURSEG_XXX_TYPE */
+ unsigned long long mtime; /* modification time of the segment */
+};
+
+struct sec_entry {
+ unsigned int valid_blocks; /* # of valid blocks in a section */
+};
+
+struct segment_allocation {
+ void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
+};
+
+struct inmem_pages {
+ struct list_head list;
+ struct page *page;
+};
+
+struct sit_info {
+ const struct segment_allocation *s_ops;
+
+ block_t sit_base_addr; /* start block address of SIT area */
+ block_t sit_blocks; /* # of blocks used by SIT area */
+ block_t written_valid_blocks; /* # of valid blocks in main area */
+ char *sit_bitmap; /* SIT bitmap pointer */
+ unsigned int bitmap_size; /* SIT bitmap size */
+
+ unsigned long *tmp_map; /* bitmap for temporal use */
+ unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */
+ unsigned int dirty_sentries; /* # of dirty sentries */
+ unsigned int sents_per_block; /* # of SIT entries per block */
+ struct mutex sentry_lock; /* to protect SIT cache */
+ struct seg_entry *sentries; /* SIT segment-level cache */
+ struct sec_entry *sec_entries; /* SIT section-level cache */
+
+ /* for cost-benefit algorithm in cleaning procedure */
+ unsigned long long elapsed_time; /* elapsed time after mount */
+ unsigned long long mounted_time; /* mount time */
+ unsigned long long min_mtime; /* min. modification time */
+ unsigned long long max_mtime; /* max. modification time */
+};
+
+struct free_segmap_info {
+ unsigned int start_segno; /* start segment number logically */
+ unsigned int free_segments; /* # of free segments */
+ unsigned int free_sections; /* # of free sections */
+ spinlock_t segmap_lock; /* free segmap lock */
+ unsigned long *free_segmap; /* free segment bitmap */
+ unsigned long *free_secmap; /* free section bitmap */
+};
+
+/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */
+enum dirty_type {
+ DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */
+ DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */
+ DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */
+ DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */
+ DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */
+ DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */
+ DIRTY, /* to count # of dirty segments */
+ PRE, /* to count # of entirely obsolete segments */
+ NR_DIRTY_TYPE
+};
+
+struct dirty_seglist_info {
+ const struct victim_selection *v_ops; /* victim selction operation */
+ unsigned long *dirty_segmap[NR_DIRTY_TYPE];
+ struct mutex seglist_lock; /* lock for segment bitmaps */
+ int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
+ unsigned long *victim_secmap; /* background GC victims */
+};
+
+/* victim selection function for cleaning and SSR */
+struct victim_selection {
+ int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
+ int, int, char);
+};
+
+/* for active log information */
+struct curseg_info {
+ struct mutex curseg_mutex; /* lock for consistency */
+ struct f2fs_summary_block *sum_blk; /* cached summary block */
+ unsigned char alloc_type; /* current allocation type */
+ unsigned int segno; /* current segment number */
+ unsigned short next_blkoff; /* next block offset to write */
+ unsigned int zone; /* current zone number */
+ unsigned int next_segno; /* preallocated segment */
+};
+
+struct sit_entry_set {
+ struct list_head set_list; /* link with all sit sets */
+ unsigned int start_segno; /* start segno of sits in set */
+ unsigned int entry_cnt; /* the # of sit entries in set */
+};
+
+/*
+ * inline functions
+ */
+static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
+{
+ return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
+}
+
+static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ return &sit_i->sentries[segno];
+}
+
+static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ return &sit_i->sec_entries[GET_SECNO(sbi, segno)];
+}
+
+static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
+ unsigned int segno, int section)
+{
+ /*
+ * In order to get # of valid blocks in a section instantly from many
+ * segments, f2fs manages two counting structures separately.
+ */
+ if (section > 1)
+ return get_sec_entry(sbi, segno)->valid_blocks;
+ else
+ return get_seg_entry(sbi, segno)->valid_blocks;
+}
+
+static inline void seg_info_from_raw_sit(struct seg_entry *se,
+ struct f2fs_sit_entry *rs)
+{
+ se->valid_blocks = GET_SIT_VBLOCKS(rs);
+ se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
+ memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+ memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+ se->type = GET_SIT_TYPE(rs);
+ se->mtime = le64_to_cpu(rs->mtime);
+}
+
+static inline void seg_info_to_raw_sit(struct seg_entry *se,
+ struct f2fs_sit_entry *rs)
+{
+ unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) |
+ se->valid_blocks;
+ rs->vblocks = cpu_to_le16(raw_vblocks);
+ memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
+ memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+ se->ckpt_valid_blocks = se->valid_blocks;
+ rs->mtime = cpu_to_le64(se->mtime);
+}
+
+static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
+ unsigned int max, unsigned int segno)
+{
+ unsigned int ret;
+ spin_lock(&free_i->segmap_lock);
+ ret = find_next_bit(free_i->free_segmap, max, segno);
+ spin_unlock(&free_i->segmap_lock);
+ return ret;
+}
+
+static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ struct free_segmap_info *free_i = FREE_I(sbi);
+ unsigned int secno = segno / sbi->segs_per_sec;
+ unsigned int start_segno = secno * sbi->segs_per_sec;
+ unsigned int next;
+
+ spin_lock(&free_i->segmap_lock);
+ clear_bit(segno, free_i->free_segmap);
+ free_i->free_segments++;
+
+ next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno);
+ if (next >= start_segno + sbi->segs_per_sec) {
+ clear_bit(secno, free_i->free_secmap);
+ free_i->free_sections++;
+ }
+ spin_unlock(&free_i->segmap_lock);
+}
+
+static inline void __set_inuse(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ struct free_segmap_info *free_i = FREE_I(sbi);
+ unsigned int secno = segno / sbi->segs_per_sec;
+ set_bit(segno, free_i->free_segmap);
+ free_i->free_segments--;
+ if (!test_and_set_bit(secno, free_i->free_secmap))
+ free_i->free_sections--;
+}
+
+static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ struct free_segmap_info *free_i = FREE_I(sbi);
+ unsigned int secno = segno / sbi->segs_per_sec;
+ unsigned int start_segno = secno * sbi->segs_per_sec;
+ unsigned int next;
+
+ spin_lock(&free_i->segmap_lock);
+ if (test_and_clear_bit(segno, free_i->free_segmap)) {
+ free_i->free_segments++;
+
+ next = find_next_bit(free_i->free_segmap,
+ start_segno + sbi->segs_per_sec, start_segno);
+ if (next >= start_segno + sbi->segs_per_sec) {
+ if (test_and_clear_bit(secno, free_i->free_secmap))
+ free_i->free_sections++;
+ }
+ }
+ spin_unlock(&free_i->segmap_lock);
+}
+
+static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ struct free_segmap_info *free_i = FREE_I(sbi);
+ unsigned int secno = segno / sbi->segs_per_sec;
+ spin_lock(&free_i->segmap_lock);
+ if (!test_and_set_bit(segno, free_i->free_segmap)) {
+ free_i->free_segments--;
+ if (!test_and_set_bit(secno, free_i->free_secmap))
+ free_i->free_sections--;
+ }
+ spin_unlock(&free_i->segmap_lock);
+}
+
+static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
+ void *dst_addr)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
+}
+
+static inline block_t written_block_count(struct f2fs_sb_info *sbi)
+{
+ return SIT_I(sbi)->written_valid_blocks;
+}
+
+static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
+{
+ return FREE_I(sbi)->free_segments;
+}
+
+static inline int reserved_segments(struct f2fs_sb_info *sbi)
+{
+ return SM_I(sbi)->reserved_segments;
+}
+
+static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
+{
+ return FREE_I(sbi)->free_sections;
+}
+
+static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
+{
+ return DIRTY_I(sbi)->nr_dirty[PRE];
+}
+
+static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi)
+{
+ return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] +
+ DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] +
+ DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] +
+ DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] +
+ DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] +
+ DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE];
+}
+
+static inline int overprovision_segments(struct f2fs_sb_info *sbi)
+{
+ return SM_I(sbi)->ovp_segments;
+}
+
+static inline int overprovision_sections(struct f2fs_sb_info *sbi)
+{
+ return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec;
+}
+
+static inline int reserved_sections(struct f2fs_sb_info *sbi)
+{
+ return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec;
+}
+
+static inline bool need_SSR(struct f2fs_sb_info *sbi)
+{
+ int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
+ int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
+ return free_sections(sbi) <= (node_secs + 2 * dent_secs +
+ reserved_sections(sbi) + 1);
+}
+
+static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
+{
+ int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
+ int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ return false;
+
+ return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
+ reserved_sections(sbi));
+}
+
+static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
+{
+ return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
+}
+
+static inline int utilization(struct f2fs_sb_info *sbi)
+{
+ return div_u64((u64)valid_user_blocks(sbi) * 100,
+ sbi->user_block_count);
+}
+
+/*
+ * Sometimes f2fs may be better to drop out-of-place update policy.
+ * And, users can control the policy through sysfs entries.
+ * There are five policies with triggering conditions as follows.
+ * F2FS_IPU_FORCE - all the time,
+ * F2FS_IPU_SSR - if SSR mode is activated,
+ * F2FS_IPU_UTIL - if FS utilization is over threashold,
+ * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over
+ * threashold,
+ * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash
+ * storages. IPU will be triggered only if the # of dirty
+ * pages over min_fsync_blocks.
+ * F2FS_IPUT_DISABLE - disable IPU. (=default option)
+ */
+#define DEF_MIN_IPU_UTIL 70
+#define DEF_MIN_FSYNC_BLOCKS 8
+
+enum {
+ F2FS_IPU_FORCE,
+ F2FS_IPU_SSR,
+ F2FS_IPU_UTIL,
+ F2FS_IPU_SSR_UTIL,
+ F2FS_IPU_FSYNC,
+};
+
+static inline bool need_inplace_update(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int policy = SM_I(sbi)->ipu_policy;
+
+ /* IPU can be done only for the user data */
+ if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
+ return false;
+
+ if (policy & (0x1 << F2FS_IPU_FORCE))
+ return true;
+ if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
+ return true;
+ if (policy & (0x1 << F2FS_IPU_UTIL) &&
+ utilization(sbi) > SM_I(sbi)->min_ipu_util)
+ return true;
+ if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
+ utilization(sbi) > SM_I(sbi)->min_ipu_util)
+ return true;
+
+ /* this is only set during fdatasync */
+ if (policy & (0x1 << F2FS_IPU_FSYNC) &&
+ is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
+ return true;
+
+ return false;
+}
+
+static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
+ int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ return curseg->segno;
+}
+
+static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi,
+ int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ return curseg->alloc_type;
+}
+
+static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ return curseg->next_blkoff;
+}
+
+#ifdef CONFIG_F2FS_CHECK_FS
+static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
+}
+
+static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
+{
+ BUG_ON(blk_addr < SEG0_BLKADDR(sbi));
+ BUG_ON(blk_addr >= MAX_BLKADDR(sbi));
+}
+
+/*
+ * Summary block is always treated as an invalid block
+ */
+static inline void check_block_count(struct f2fs_sb_info *sbi,
+ int segno, struct f2fs_sit_entry *raw_sit)
+{
+ bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
+ int valid_blocks = 0;
+ int cur_pos = 0, next_pos;
+
+ /* check segment usage */
+ BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
+
+ /* check boundary of a given segment number */
+ BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
+
+ /* check bitmap with valid block count */
+ do {
+ if (is_valid) {
+ next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
+ sbi->blocks_per_seg,
+ cur_pos);
+ valid_blocks += next_pos - cur_pos;
+ } else
+ next_pos = find_next_bit_le(&raw_sit->valid_map,
+ sbi->blocks_per_seg,
+ cur_pos);
+ cur_pos = next_pos;
+ is_valid = !is_valid;
+ } while (cur_pos < sbi->blocks_per_seg);
+ BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
+}
+#else
+static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ if (segno > TOTAL_SEGS(sbi) - 1)
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+}
+
+static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
+{
+ if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi))
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+}
+
+/*
+ * Summary block is always treated as an invalid block
+ */
+static inline void check_block_count(struct f2fs_sb_info *sbi,
+ int segno, struct f2fs_sit_entry *raw_sit)
+{
+ /* check segment usage */
+ if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg)
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
+ /* check boundary of a given segment number */
+ if (segno > TOTAL_SEGS(sbi) - 1)
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+}
+#endif
+
+static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
+ unsigned int start)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ unsigned int offset = SIT_BLOCK_OFFSET(start);
+ block_t blk_addr = sit_i->sit_base_addr + offset;
+
+ check_seg_range(sbi, start);
+
+ /* calculate sit block address */
+ if (f2fs_test_bit(offset, sit_i->sit_bitmap))
+ blk_addr += sit_i->sit_blocks;
+
+ return blk_addr;
+}
+
+static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi,
+ pgoff_t block_addr)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ block_addr -= sit_i->sit_base_addr;
+ if (block_addr < sit_i->sit_blocks)
+ block_addr += sit_i->sit_blocks;
+ else
+ block_addr -= sit_i->sit_blocks;
+
+ return block_addr + sit_i->sit_base_addr;
+}
+
+static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
+{
+ unsigned int block_off = SIT_BLOCK_OFFSET(start);
+
+ f2fs_change_bit(block_off, sit_i->sit_bitmap);
+}
+
+static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec -
+ sit_i->mounted_time;
+}
+
+static inline void set_summary(struct f2fs_summary *sum, nid_t nid,
+ unsigned int ofs_in_node, unsigned char version)
+{
+ sum->nid = cpu_to_le32(nid);
+ sum->ofs_in_node = cpu_to_le16(ofs_in_node);
+ sum->version = version;
+}
+
+static inline block_t start_sum_block(struct f2fs_sb_info *sbi)
+{
+ return __start_cp_addr(sbi) +
+ le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
+}
+
+static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
+{
+ return __start_cp_addr(sbi) +
+ le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count)
+ - (base + 1) + type;
+}
+
+static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
+{
+ if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
+ return true;
+ return false;
+}
+
+static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
+{
+ struct block_device *bdev = sbi->sb->s_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ return SECTOR_TO_BLOCK(queue_max_sectors(q));
+}
+
+/*
+ * It is very important to gather dirty pages and write at once, so that we can
+ * submit a big bio without interfering other data writes.
+ * By default, 512 pages for directory data,
+ * 512 pages (2MB) * 3 for three types of nodes, and
+ * max_bio_blocks for meta are set.
+ */
+static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
+{
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return 0;
+
+ if (type == DATA)
+ return sbi->blocks_per_seg;
+ else if (type == NODE)
+ return 3 * sbi->blocks_per_seg;
+ else if (type == META)
+ return MAX_BIO_BLOCKS(sbi);
+ else
+ return 0;
+}
+
+/*
+ * When writing pages, it'd better align nr_to_write for segment size.
+ */
+static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
+ struct writeback_control *wbc)
+{
+ long nr_to_write, desired;
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ return 0;
+
+ nr_to_write = wbc->nr_to_write;
+
+ if (type == DATA)
+ desired = 4096;
+ else if (type == NODE)
+ desired = 3 * max_hw_blocks(sbi);
+ else
+ desired = MAX_BIO_BLOCKS(sbi);
+
+ wbc->nr_to_write = desired;
+ return desired - nr_to_write;
+}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
new file mode 100644
index 0000000..4ab9c76
--- /dev/null
+++ b/fs/f2fs/super.c
@@ -0,0 +1,1324 @@
+/*
+ * fs/f2fs/super.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/statfs.h>
+#include <linux/buffer_head.h>
+#include <linux/backing-dev.h>
+#include <linux/kthread.h>
+#include <linux/parser.h>
+#include <linux/mount.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/random.h>
+#include <linux/exportfs.h>
+#include <linux/blkdev.h>
+#include <linux/f2fs_fs.h>
+#include <linux/sysfs.h>
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "xattr.h"
+#include "gc.h"
+#include "trace.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/f2fs.h>
+
+static struct proc_dir_entry *f2fs_proc_root;
+static struct kmem_cache *f2fs_inode_cachep;
+static struct kset *f2fs_kset;
+
+enum {
+ Opt_gc_background,
+ Opt_disable_roll_forward,
+ Opt_norecovery,
+ Opt_discard,
+ Opt_noheap,
+ Opt_user_xattr,
+ Opt_nouser_xattr,
+ Opt_acl,
+ Opt_noacl,
+ Opt_active_logs,
+ Opt_disable_ext_identify,
+ Opt_inline_xattr,
+ Opt_inline_data,
+ Opt_inline_dentry,
+ Opt_flush_merge,
+ Opt_nobarrier,
+ Opt_fastboot,
+ Opt_err,
+};
+
+static match_table_t f2fs_tokens = {
+ {Opt_gc_background, "background_gc=%s"},
+ {Opt_disable_roll_forward, "disable_roll_forward"},
+ {Opt_norecovery, "norecovery"},
+ {Opt_discard, "discard"},
+ {Opt_noheap, "no_heap"},
+ {Opt_user_xattr, "user_xattr"},
+ {Opt_nouser_xattr, "nouser_xattr"},
+ {Opt_acl, "acl"},
+ {Opt_noacl, "noacl"},
+ {Opt_active_logs, "active_logs=%u"},
+ {Opt_disable_ext_identify, "disable_ext_identify"},
+ {Opt_inline_xattr, "inline_xattr"},
+ {Opt_inline_data, "inline_data"},
+ {Opt_inline_dentry, "inline_dentry"},
+ {Opt_flush_merge, "flush_merge"},
+ {Opt_nobarrier, "nobarrier"},
+ {Opt_fastboot, "fastboot"},
+ {Opt_err, NULL},
+};
+
+/* Sysfs support for f2fs */
+enum {
+ GC_THREAD, /* struct f2fs_gc_thread */
+ SM_INFO, /* struct f2fs_sm_info */
+ NM_INFO, /* struct f2fs_nm_info */
+ F2FS_SBI, /* struct f2fs_sb_info */
+};
+
+struct f2fs_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
+ ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
+ const char *, size_t);
+ int struct_type;
+ int offset;
+};
+
+static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
+{
+ if (struct_type == GC_THREAD)
+ return (unsigned char *)sbi->gc_thread;
+ else if (struct_type == SM_INFO)
+ return (unsigned char *)SM_I(sbi);
+ else if (struct_type == NM_INFO)
+ return (unsigned char *)NM_I(sbi);
+ else if (struct_type == F2FS_SBI)
+ return (unsigned char *)sbi;
+ return NULL;
+}
+
+static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ unsigned char *ptr = NULL;
+ unsigned int *ui;
+
+ ptr = __struct_ptr(sbi, a->struct_type);
+ if (!ptr)
+ return -EINVAL;
+
+ ui = (unsigned int *)(ptr + a->offset);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi,
+ const char *buf, size_t count)
+{
+ unsigned char *ptr;
+ unsigned long t;
+ unsigned int *ui;
+ ssize_t ret;
+
+ ptr = __struct_ptr(sbi, a->struct_type);
+ if (!ptr)
+ return -EINVAL;
+
+ ui = (unsigned int *)(ptr + a->offset);
+
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret < 0)
+ return ret;
+ *ui = t;
+ return count;
+}
+
+static ssize_t f2fs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+ return a->show ? a->show(a, sbi, buf) : 0;
+}
+
+static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+ return a->store ? a->store(a, sbi, buf, len) : 0;
+}
+
+static void f2fs_sb_release(struct kobject *kobj)
+{
+ struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+ s_kobj);
+ complete(&sbi->s_kobj_unregister);
+}
+
+#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
+static struct f2fs_attr f2fs_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = _mode }, \
+ .show = _show, \
+ .store = _store, \
+ .struct_type = _struct_type, \
+ .offset = _offset \
+}
+
+#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
+ F2FS_ATTR_OFFSET(struct_type, name, 0644, \
+ f2fs_sbi_show, f2fs_sbi_store, \
+ offsetof(struct struct_name, elname))
+
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
+
+#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
+static struct attribute *f2fs_attrs[] = {
+ ATTR_LIST(gc_min_sleep_time),
+ ATTR_LIST(gc_max_sleep_time),
+ ATTR_LIST(gc_no_gc_sleep_time),
+ ATTR_LIST(gc_idle),
+ ATTR_LIST(reclaim_segments),
+ ATTR_LIST(max_small_discards),
+ ATTR_LIST(batched_trim_sections),
+ ATTR_LIST(ipu_policy),
+ ATTR_LIST(min_ipu_util),
+ ATTR_LIST(min_fsync_blocks),
+ ATTR_LIST(max_victim_search),
+ ATTR_LIST(dir_level),
+ ATTR_LIST(ram_thresh),
+ NULL,
+};
+
+static const struct sysfs_ops f2fs_attr_ops = {
+ .show = f2fs_attr_show,
+ .store = f2fs_attr_store,
+};
+
+static struct kobj_type f2fs_ktype = {
+ .default_attrs = f2fs_attrs,
+ .sysfs_ops = &f2fs_attr_ops,
+ .release = f2fs_sb_release,
+};
+
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
+ va_end(args);
+}
+
+static void init_once(void *foo)
+{
+ struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
+
+ inode_init_once(&fi->vfs_inode);
+}
+
+static int parse_options(struct super_block *sb, char *options)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ substring_t args[MAX_OPT_ARGS];
+ char *p, *name;
+ int arg = 0;
+
+ if (!options)
+ return 0;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+ if (!*p)
+ continue;
+ /*
+ * Initialize args struct so we know whether arg was
+ * found; some options take optional arguments.
+ */
+ args[0].to = args[0].from = NULL;
+ token = match_token(p, f2fs_tokens, args);
+
+ switch (token) {
+ case Opt_gc_background:
+ name = match_strdup(&args[0]);
+
+ if (!name)
+ return -ENOMEM;
+ if (strlen(name) == 2 && !strncmp(name, "on", 2))
+ set_opt(sbi, BG_GC);
+ else if (strlen(name) == 3 && !strncmp(name, "off", 3))
+ clear_opt(sbi, BG_GC);
+ else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
+ case Opt_disable_roll_forward:
+ set_opt(sbi, DISABLE_ROLL_FORWARD);
+ break;
+ case Opt_norecovery:
+ /* this option mounts f2fs with ro */
+ set_opt(sbi, DISABLE_ROLL_FORWARD);
+ if (!f2fs_readonly(sb))
+ return -EINVAL;
+ break;
+ case Opt_discard:
+ set_opt(sbi, DISCARD);
+ break;
+ case Opt_noheap:
+ set_opt(sbi, NOHEAP);
+ break;
+#ifdef CONFIG_F2FS_FS_XATTR
+ case Opt_user_xattr:
+ set_opt(sbi, XATTR_USER);
+ break;
+ case Opt_nouser_xattr:
+ clear_opt(sbi, XATTR_USER);
+ break;
+ case Opt_inline_xattr:
+ set_opt(sbi, INLINE_XATTR);
+ break;
+#else
+ case Opt_user_xattr:
+ f2fs_msg(sb, KERN_INFO,
+ "user_xattr options not supported");
+ break;
+ case Opt_nouser_xattr:
+ f2fs_msg(sb, KERN_INFO,
+ "nouser_xattr options not supported");
+ break;
+ case Opt_inline_xattr:
+ f2fs_msg(sb, KERN_INFO,
+ "inline_xattr options not supported");
+ break;
+#endif
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+ case Opt_acl:
+ set_opt(sbi, POSIX_ACL);
+ break;
+ case Opt_noacl:
+ clear_opt(sbi, POSIX_ACL);
+ break;
+#else
+ case Opt_acl:
+ f2fs_msg(sb, KERN_INFO, "acl options not supported");
+ break;
+ case Opt_noacl:
+ f2fs_msg(sb, KERN_INFO, "noacl options not supported");
+ break;
+#endif
+ case Opt_active_logs:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
+ return -EINVAL;
+ sbi->active_logs = arg;
+ break;
+ case Opt_disable_ext_identify:
+ set_opt(sbi, DISABLE_EXT_IDENTIFY);
+ break;
+ case Opt_inline_data:
+ set_opt(sbi, INLINE_DATA);
+ break;
+ case Opt_inline_dentry:
+ set_opt(sbi, INLINE_DENTRY);
+ break;
+ case Opt_flush_merge:
+ set_opt(sbi, FLUSH_MERGE);
+ break;
+ case Opt_nobarrier:
+ set_opt(sbi, NOBARRIER);
+ break;
+ case Opt_fastboot:
+ set_opt(sbi, FASTBOOT);
+ break;
+ default:
+ f2fs_msg(sb, KERN_ERR,
+ "Unrecognized mount option \"%s\" or missing value",
+ p);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static struct inode *f2fs_alloc_inode(struct super_block *sb)
+{
+ struct f2fs_inode_info *fi;
+
+ fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
+ if (!fi)
+ return NULL;
+
+ init_once((void *) fi);
+
+ /* Initialize f2fs-specific inode info */
+ fi->vfs_inode.i_version = 1;
+ atomic_set(&fi->dirty_pages, 0);
+ fi->i_current_depth = 1;
+ fi->i_advise = 0;
+ rwlock_init(&fi->ext.ext_lock);
+ init_rwsem(&fi->i_sem);
+ INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
+ INIT_LIST_HEAD(&fi->inmem_pages);
+ mutex_init(&fi->inmem_lock);
+
+ set_inode_flag(fi, FI_NEW_INODE);
+
+ if (test_opt(F2FS_SB(sb), INLINE_XATTR))
+ set_inode_flag(fi, FI_INLINE_XATTR);
+
+ /* Will be used by directory only */
+ fi->i_dir_level = F2FS_SB(sb)->dir_level;
+
+ return &fi->vfs_inode;
+}
+
+static int f2fs_drop_inode(struct inode *inode)
+{
+ /*
+ * This is to avoid a deadlock condition like below.
+ * writeback_single_inode(inode)
+ * - f2fs_write_data_page
+ * - f2fs_gc -> iput -> evict
+ * - inode_wait_for_writeback(inode)
+ */
+ if (!inode_unhashed(inode) && inode->i_state & I_SYNC)
+ return 0;
+ return generic_drop_inode(inode);
+}
+
+/*
+ * f2fs_dirty_inode() is called from __mark_inode_dirty()
+ *
+ * We should call set_dirty_inode to write the dirty inode through write_inode.
+ */
+static void f2fs_dirty_inode(struct inode *inode, int flags)
+{
+ set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
+}
+
+static void f2fs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
+}
+
+static void f2fs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, f2fs_i_callback);
+}
+
+static void f2fs_put_super(struct super_block *sb)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ if (sbi->s_proc) {
+ remove_proc_entry("segment_info", sbi->s_proc);
+ remove_proc_entry(sb->s_id, f2fs_proc_root);
+ }
+ kobject_del(&sbi->s_kobj);
+
+ f2fs_destroy_stats(sbi);
+ stop_gc_thread(sbi);
+
+ /*
+ * We don't need to do checkpoint when superblock is clean.
+ * But, the previous checkpoint was not done by umount, it needs to do
+ * clean checkpoint again.
+ */
+ if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+ !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) {
+ struct cp_control cpc = {
+ .reason = CP_UMOUNT,
+ };
+ write_checkpoint(sbi, &cpc);
+ }
+
+ /*
+ * normally superblock is clean, so we need to release this.
+ * In addition, EIO will skip do checkpoint, we need this as well.
+ */
+ release_dirty_inode(sbi);
+ release_discard_addrs(sbi);
+
+ iput(sbi->node_inode);
+ iput(sbi->meta_inode);
+
+ /* destroy f2fs internal modules */
+ destroy_node_manager(sbi);
+ destroy_segment_manager(sbi);
+
+ kfree(sbi->ckpt);
+ kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
+
+ sb->s_fs_info = NULL;
+ brelse(sbi->raw_super_buf);
+ kfree(sbi);
+}
+
+int f2fs_sync_fs(struct super_block *sb, int sync)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ trace_f2fs_sync_fs(sb, sync);
+
+ if (sync) {
+ struct cp_control cpc;
+
+ cpc.reason = __get_cp_reason(sbi);
+
+ mutex_lock(&sbi->gc_mutex);
+ write_checkpoint(sbi, &cpc);
+ mutex_unlock(&sbi->gc_mutex);
+ } else {
+ f2fs_balance_fs(sbi);
+ }
+ f2fs_trace_ios(NULL, NULL, 1);
+
+ return 0;
+}
+
+static int f2fs_freeze(struct super_block *sb)
+{
+ int err;
+
+ if (f2fs_readonly(sb))
+ return 0;
+
+ err = f2fs_sync_fs(sb, 1);
+ return err;
+}
+
+static int f2fs_unfreeze(struct super_block *sb)
+{
+ return 0;
+}
+
+static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+ block_t total_count, user_block_count, start_count, ovp_count;
+
+ total_count = le64_to_cpu(sbi->raw_super->block_count);
+ user_block_count = sbi->user_block_count;
+ start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
+ ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
+ buf->f_type = F2FS_SUPER_MAGIC;
+ buf->f_bsize = sbi->blocksize;
+
+ buf->f_blocks = total_count - start_count;
+ buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
+ buf->f_bavail = user_block_count - valid_user_blocks(sbi);
+
+ buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+ buf->f_ffree = buf->f_files - valid_inode_count(sbi);
+
+ buf->f_namelen = F2FS_NAME_LEN;
+ buf->f_fsid.val[0] = (u32)id;
+ buf->f_fsid.val[1] = (u32)(id >> 32);
+
+ return 0;
+}
+
+static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
+
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
+ seq_printf(seq, ",background_gc=%s", "on");
+ else
+ seq_printf(seq, ",background_gc=%s", "off");
+ if (test_opt(sbi, DISABLE_ROLL_FORWARD))
+ seq_puts(seq, ",disable_roll_forward");
+ if (test_opt(sbi, DISCARD))
+ seq_puts(seq, ",discard");
+ if (test_opt(sbi, NOHEAP))
+ seq_puts(seq, ",no_heap_alloc");
+#ifdef CONFIG_F2FS_FS_XATTR
+ if (test_opt(sbi, XATTR_USER))
+ seq_puts(seq, ",user_xattr");
+ else
+ seq_puts(seq, ",nouser_xattr");
+ if (test_opt(sbi, INLINE_XATTR))
+ seq_puts(seq, ",inline_xattr");
+#endif
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+ if (test_opt(sbi, POSIX_ACL))
+ seq_puts(seq, ",acl");
+ else
+ seq_puts(seq, ",noacl");
+#endif
+ if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
+ seq_puts(seq, ",disable_ext_identify");
+ if (test_opt(sbi, INLINE_DATA))
+ seq_puts(seq, ",inline_data");
+ if (test_opt(sbi, INLINE_DENTRY))
+ seq_puts(seq, ",inline_dentry");
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
+ seq_puts(seq, ",flush_merge");
+ if (test_opt(sbi, NOBARRIER))
+ seq_puts(seq, ",nobarrier");
+ if (test_opt(sbi, FASTBOOT))
+ seq_puts(seq, ",fastboot");
+ seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+
+ return 0;
+}
+
+static int segment_info_seq_show(struct seq_file *seq, void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ unsigned int total_segs =
+ le32_to_cpu(sbi->raw_super->segment_count_main);
+ int i;
+
+ seq_puts(seq, "format: segment_type|valid_blocks\n"
+ "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
+ for (i = 0; i < total_segs; i++) {
+ struct seg_entry *se = get_seg_entry(sbi, i);
+
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-5d", i);
+ seq_printf(seq, "%d|%-3u", se->type,
+ get_valid_blocks(sbi, i, 1));
+ if ((i % 10) == 9 || i == (total_segs - 1))
+ seq_putc(seq, '\n');
+ else
+ seq_putc(seq, ' ');
+ }
+
+ return 0;
+}
+
+static int segment_info_open_fs(struct inode *inode, struct file *file)
+{
+ return single_open(file, segment_info_seq_show, PDE(inode)->data);
+}
+
+static const struct file_operations f2fs_seq_segment_info_fops = {
+ .owner = THIS_MODULE,
+ .open = segment_info_open_fs,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int f2fs_remount(struct super_block *sb, int *flags, char *data)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_mount_info org_mount_opt;
+ int err, active_logs;
+ bool need_restart_gc = false;
+ bool need_stop_gc = false;
+
+ sync_filesystem(sb);
+
+ /*
+ * Save the old mount options in case we
+ * need to restore them.
+ */
+ org_mount_opt = sbi->mount_opt;
+ active_logs = sbi->active_logs;
+
+ sbi->mount_opt.opt = 0;
+ sbi->active_logs = NR_CURSEG_TYPE;
+
+ /* parse mount options */
+ err = parse_options(sb, data);
+ if (err)
+ goto restore_opts;
+
+ /*
+ * Previous and new state of filesystem is RO,
+ * so skip checking GC and FLUSH_MERGE conditions.
+ */
+ if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
+ goto skip;
+
+ /*
+ * We stop the GC thread if FS is mounted as RO
+ * or if background_gc = off is passed in mount
+ * option. Also sync the filesystem.
+ */
+ if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
+ if (sbi->gc_thread) {
+ stop_gc_thread(sbi);
+ f2fs_sync_fs(sb, 1);
+ need_restart_gc = true;
+ }
+ } else if (!sbi->gc_thread) {
+ err = start_gc_thread(sbi);
+ if (err)
+ goto restore_opts;
+ need_stop_gc = true;
+ }
+
+ /*
+ * We stop issue flush thread if FS is mounted as RO
+ * or if flush_merge is not passed in mount option.
+ */
+ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
+ destroy_flush_cmd_control(sbi);
+ } else if (!SM_I(sbi)->cmd_control_info) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ goto restore_gc;
+ }
+skip:
+ /* Update the POSIXACL Flag */
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
+ return 0;
+restore_gc:
+ if (need_restart_gc) {
+ if (start_gc_thread(sbi))
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "background gc thread has stopped");
+ } else if (need_stop_gc) {
+ stop_gc_thread(sbi);
+ }
+restore_opts:
+ sbi->mount_opt = org_mount_opt;
+ sbi->active_logs = active_logs;
+ return err;
+}
+
+static struct super_operations f2fs_sops = {
+ .alloc_inode = f2fs_alloc_inode,
+ .drop_inode = f2fs_drop_inode,
+ .destroy_inode = f2fs_destroy_inode,
+ .write_inode = f2fs_write_inode,
+ .dirty_inode = f2fs_dirty_inode,
+ .show_options = f2fs_show_options,
+ .evict_inode = f2fs_evict_inode,
+ .put_super = f2fs_put_super,
+ .sync_fs = f2fs_sync_fs,
+ .freeze_fs = f2fs_freeze,
+ .unfreeze_fs = f2fs_unfreeze,
+ .statfs = f2fs_statfs,
+ .remount_fs = f2fs_remount,
+};
+
+static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
+ u64 ino, u32 generation)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct inode *inode;
+
+ if (check_nid_range(sbi, ino))
+ return ERR_PTR(-ESTALE);
+
+ /*
+ * f2fs_iget isn't quite right if the inode is currently unallocated!
+ * However f2fs_iget currently does appropriate checks to handle stale
+ * inodes so everything is OK.
+ */
+ inode = f2fs_iget(sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ if (unlikely(generation && inode->i_generation != generation)) {
+ /* we didn't find the right inode.. */
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+ return inode;
+}
+
+static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+ f2fs_nfs_get_inode);
+}
+
+static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+ f2fs_nfs_get_inode);
+}
+
+static const struct export_operations f2fs_export_ops = {
+ .fh_to_dentry = f2fs_fh_to_dentry,
+ .fh_to_parent = f2fs_fh_to_parent,
+ .get_parent = f2fs_get_parent,
+};
+
+static loff_t max_file_size(unsigned bits)
+{
+ loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
+ loff_t leaf_count = ADDRS_PER_BLOCK;
+
+ /* two direct node blocks */
+ result += (leaf_count * 2);
+
+ /* two indirect node blocks */
+ leaf_count *= NIDS_PER_BLOCK;
+ result += (leaf_count * 2);
+
+ /* one double indirect node block */
+ leaf_count *= NIDS_PER_BLOCK;
+ result += leaf_count;
+
+ result <<= bits;
+ return result;
+}
+
+static int sanity_check_raw_super(struct super_block *sb,
+ struct f2fs_super_block *raw_super)
+{
+ unsigned int blocksize;
+
+ if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Magic Mismatch, valid(0x%x) - read(0x%x)",
+ F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
+ return 1;
+ }
+
+ /* Currently, support only 4KB page cache size */
+ if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid page_cache_size (%lu), supports only 4KB\n",
+ PAGE_CACHE_SIZE);
+ return 1;
+ }
+
+ /* Currently, support only 4KB block size */
+ blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
+ if (blocksize != F2FS_BLKSIZE) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid blocksize (%u), supports only 4KB\n",
+ blocksize);
+ return 1;
+ }
+
+ /* Currently, support 512/1024/2048/4096 bytes sector size */
+ if (le32_to_cpu(raw_super->log_sectorsize) >
+ F2FS_MAX_LOG_SECTOR_SIZE ||
+ le32_to_cpu(raw_super->log_sectorsize) <
+ F2FS_MIN_LOG_SECTOR_SIZE) {
+ f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
+ le32_to_cpu(raw_super->log_sectorsize));
+ return 1;
+ }
+ if (le32_to_cpu(raw_super->log_sectors_per_block) +
+ le32_to_cpu(raw_super->log_sectorsize) !=
+ F2FS_MAX_LOG_SECTOR_SIZE) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid log sectors per block(%u) log sectorsize(%u)",
+ le32_to_cpu(raw_super->log_sectors_per_block),
+ le32_to_cpu(raw_super->log_sectorsize));
+ return 1;
+ }
+ return 0;
+}
+
+static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+{
+ unsigned int total, fsmeta;
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+
+ total = le32_to_cpu(raw_super->segment_count);
+ fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
+ fsmeta += le32_to_cpu(raw_super->segment_count_sit);
+ fsmeta += le32_to_cpu(raw_super->segment_count_nat);
+ fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
+ fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
+
+ if (unlikely(fsmeta >= total))
+ return 1;
+
+ if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
+ return 1;
+ }
+ return 0;
+}
+
+static void init_sb_info(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_super_block *raw_super = sbi->raw_super;
+ int i;
+
+ sbi->log_sectors_per_block =
+ le32_to_cpu(raw_super->log_sectors_per_block);
+ sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
+ sbi->blocksize = 1 << sbi->log_blocksize;
+ sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
+ sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
+ sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
+ sbi->total_sections = le32_to_cpu(raw_super->section_count);
+ sbi->total_node_count =
+ (le32_to_cpu(raw_super->segment_count_nat) / 2)
+ * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
+ sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
+ sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
+ sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
+ sbi->cur_victim_sec = NULL_SECNO;
+ sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
+
+ for (i = 0; i < NR_COUNT_TYPE; i++)
+ atomic_set(&sbi->nr_pages[i], 0);
+
+ sbi->dir_level = DEF_DIR_LEVEL;
+ clear_sbi_flag(sbi, SBI_NEED_FSCK);
+}
+
+/*
+ * Read f2fs raw super block.
+ * Because we have two copies of super block, so read the first one at first,
+ * if the first one is invalid, move to read the second one.
+ */
+static int read_raw_super_block(struct super_block *sb,
+ struct f2fs_super_block **raw_super,
+ struct buffer_head **raw_super_buf)
+{
+ int block = 0;
+
+retry:
+ *raw_super_buf = sb_bread(sb, block);
+ if (!*raw_super_buf) {
+ f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
+ block + 1);
+ if (block == 0) {
+ block++;
+ goto retry;
+ } else {
+ return -EIO;
+ }
+ }
+
+ *raw_super = (struct f2fs_super_block *)
+ ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
+
+ /* sanity checking of raw super */
+ if (sanity_check_raw_super(sb, *raw_super)) {
+ brelse(*raw_super_buf);
+ f2fs_msg(sb, KERN_ERR,
+ "Can't find valid F2FS filesystem in %dth superblock",
+ block + 1);
+ if (block == 0) {
+ block++;
+ goto retry;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct f2fs_sb_info *sbi;
+ struct f2fs_super_block *raw_super = NULL;
+ struct buffer_head *raw_super_buf;
+ struct inode *root;
+ long err = -EINVAL;
+ bool retry = true;
+ char *options = NULL;
+ int i;
+
+try_onemore:
+ /* allocate memory for f2fs-specific super block info */
+ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+
+ /* set a block size */
+ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
+ f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
+ goto free_sbi;
+ }
+
+ err = read_raw_super_block(sb, &raw_super, &raw_super_buf);
+ if (err)
+ goto free_sbi;
+
+ sb->s_fs_info = sbi;
+ /* init some FS parameters */
+ sbi->active_logs = NR_CURSEG_TYPE;
+
+ set_opt(sbi, BG_GC);
+
+#ifdef CONFIG_F2FS_FS_XATTR
+ set_opt(sbi, XATTR_USER);
+#endif
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+ set_opt(sbi, POSIX_ACL);
+#endif
+ /* parse mount options */
+ options = kstrdup((const char *)data, GFP_KERNEL);
+ if (data && !options) {
+ err = -ENOMEM;
+ goto free_sb_buf;
+ }
+
+ err = parse_options(sb, options);
+ if (err)
+ goto free_options;
+
+ sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
+ sb->s_max_links = F2FS_LINK_MAX;
+ get_random_bytes(&sbi->s_next_generation, sizeof(u32));
+
+ sb->s_op = &f2fs_sops;
+ sb->s_xattr = f2fs_xattr_handlers;
+ sb->s_export_op = &f2fs_export_ops;
+ sb->s_magic = F2FS_SUPER_MAGIC;
+ sb->s_time_gran = 1;
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
+ memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+
+ /* init f2fs-specific super block info */
+ sbi->sb = sb;
+ sbi->raw_super = raw_super;
+ sbi->raw_super_buf = raw_super_buf;
+ mutex_init(&sbi->gc_mutex);
+ mutex_init(&sbi->writepages);
+ mutex_init(&sbi->cp_mutex);
+ init_rwsem(&sbi->node_write);
+ clear_sbi_flag(sbi, SBI_POR_DOING);
+ spin_lock_init(&sbi->stat_lock);
+
+ init_rwsem(&sbi->read_io.io_rwsem);
+ sbi->read_io.sbi = sbi;
+ sbi->read_io.bio = NULL;
+ for (i = 0; i < NR_PAGE_TYPE; i++) {
+ init_rwsem(&sbi->write_io[i].io_rwsem);
+ sbi->write_io[i].sbi = sbi;
+ sbi->write_io[i].bio = NULL;
+ }
+
+ init_rwsem(&sbi->cp_rwsem);
+ init_waitqueue_head(&sbi->cp_wait);
+ init_sb_info(sbi);
+
+ /* get an inode for meta space */
+ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
+ if (IS_ERR(sbi->meta_inode)) {
+ f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
+ err = PTR_ERR(sbi->meta_inode);
+ goto free_options;
+ }
+
+ err = get_valid_checkpoint(sbi);
+ if (err) {
+ f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
+ goto free_meta_inode;
+ }
+
+ /* sanity checking of checkpoint */
+ err = -EINVAL;
+ if (sanity_check_ckpt(sbi)) {
+ f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint");
+ goto free_cp;
+ }
+
+ sbi->total_valid_node_count =
+ le32_to_cpu(sbi->ckpt->valid_node_count);
+ sbi->total_valid_inode_count =
+ le32_to_cpu(sbi->ckpt->valid_inode_count);
+ sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
+ sbi->total_valid_block_count =
+ le64_to_cpu(sbi->ckpt->valid_block_count);
+ sbi->last_valid_block_count = sbi->total_valid_block_count;
+ sbi->alloc_valid_block_count = 0;
+ INIT_LIST_HEAD(&sbi->dir_inode_list);
+ spin_lock_init(&sbi->dir_inode_lock);
+
+ init_ino_entry_info(sbi);
+
+ /* setup f2fs internal modules */
+ err = build_segment_manager(sbi);
+ if (err) {
+ f2fs_msg(sb, KERN_ERR,
+ "Failed to initialize F2FS segment manager");
+ goto free_sm;
+ }
+ err = build_node_manager(sbi);
+ if (err) {
+ f2fs_msg(sb, KERN_ERR,
+ "Failed to initialize F2FS node manager");
+ goto free_nm;
+ }
+
+ build_gc_manager(sbi);
+
+ /* get an inode for node space */
+ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
+ if (IS_ERR(sbi->node_inode)) {
+ f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
+ err = PTR_ERR(sbi->node_inode);
+ goto free_nm;
+ }
+
+ /* if there are nt orphan nodes free them */
+ recover_orphan_inodes(sbi);
+
+ /* read root inode and dentry */
+ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
+ if (IS_ERR(root)) {
+ f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
+ err = PTR_ERR(root);
+ goto free_node_inode;
+ }
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ iput(root);
+ err = -EINVAL;
+ goto free_node_inode;
+ }
+
+ sb->s_root = d_make_root(root); /* allocate root dentry */
+ if (!sb->s_root) {
+ err = -ENOMEM;
+ goto free_root_inode;
+ }
+
+ err = f2fs_build_stats(sbi);
+ if (err)
+ goto free_root_inode;
+
+ if (f2fs_proc_root)
+ sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
+
+ if (sbi->s_proc)
+ proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
+ &f2fs_seq_segment_info_fops, sb);
+
+ if (test_opt(sbi, DISCARD)) {
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ if (!blk_queue_discard(q))
+ f2fs_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but "
+ "the device does not support discard");
+ }
+
+ sbi->s_kobj.kset = f2fs_kset;
+ init_completion(&sbi->s_kobj_unregister);
+ err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
+ "%s", sb->s_id);
+ if (err)
+ goto free_proc;
+
+ if (!retry)
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
+ /* recover fsynced data */
+ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+ /*
+ * mount should be failed, when device has readonly mode, and
+ * previous checkpoint was not done by clean system shutdown.
+ */
+ if (bdev_read_only(sb->s_bdev) &&
+ !is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) {
+ err = -EROFS;
+ goto free_kobj;
+ }
+ err = recover_fsync_data(sbi);
+ if (err) {
+ f2fs_msg(sb, KERN_ERR,
+ "Cannot recover all fsync data errno=%ld", err);
+ goto free_kobj;
+ }
+ }
+
+ /*
+ * If filesystem is not mounted as read-only then
+ * do start the gc_thread.
+ */
+ if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
+ /* After POR, we can run background GC thread.*/
+ err = start_gc_thread(sbi);
+ if (err)
+ goto free_kobj;
+ }
+ kfree(options);
+ return 0;
+
+free_kobj:
+ kobject_del(&sbi->s_kobj);
+free_proc:
+ if (sbi->s_proc) {
+ remove_proc_entry("segment_info", sbi->s_proc);
+ remove_proc_entry(sb->s_id, f2fs_proc_root);
+ }
+ f2fs_destroy_stats(sbi);
+free_root_inode:
+ dput(sb->s_root);
+ sb->s_root = NULL;
+free_node_inode:
+ iput(sbi->node_inode);
+free_nm:
+ destroy_node_manager(sbi);
+free_sm:
+ destroy_segment_manager(sbi);
+free_cp:
+ kfree(sbi->ckpt);
+free_meta_inode:
+ make_bad_inode(sbi->meta_inode);
+ iput(sbi->meta_inode);
+free_options:
+ kfree(options);
+free_sb_buf:
+ brelse(raw_super_buf);
+free_sbi:
+ kfree(sbi);
+
+ /* give only one another chance */
+ if (retry) {
+ retry = 0;
+ shrink_dcache_sb(sb);
+ goto try_onemore;
+ }
+ return err;
+}
+
+static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
+}
+
+static void kill_f2fs_super(struct super_block *sb)
+{
+ if (sb->s_root)
+ set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
+ kill_block_super(sb);
+}
+
+static struct file_system_type f2fs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "f2fs",
+ .mount = f2fs_mount,
+ .kill_sb = kill_f2fs_super,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+
+static int __init init_inodecache(void)
+{
+ f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
+ sizeof(struct f2fs_inode_info));
+ if (!f2fs_inode_cachep)
+ return -ENOMEM;
+ return 0;
+}
+
+static void destroy_inodecache(void)
+{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(f2fs_inode_cachep);
+}
+
+static int __init init_f2fs_fs(void)
+{
+ int err;
+
+ f2fs_build_trace_ios();
+
+ err = init_inodecache();
+ if (err)
+ goto fail;
+ err = create_node_manager_caches();
+ if (err)
+ goto free_inodecache;
+ err = create_segment_manager_caches();
+ if (err)
+ goto free_node_manager_caches;
+ err = create_checkpoint_caches();
+ if (err)
+ goto free_segment_manager_caches;
+ f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
+ if (!f2fs_kset) {
+ err = -ENOMEM;
+ goto free_checkpoint_caches;
+ }
+ err = register_filesystem(&f2fs_fs_type);
+ if (err)
+ goto free_kset;
+ f2fs_create_root_stats();
+ f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+ return 0;
+
+free_kset:
+ kset_unregister(f2fs_kset);
+free_checkpoint_caches:
+ destroy_checkpoint_caches();
+free_segment_manager_caches:
+ destroy_segment_manager_caches();
+free_node_manager_caches:
+ destroy_node_manager_caches();
+free_inodecache:
+ destroy_inodecache();
+fail:
+ return err;
+}
+
+static void __exit exit_f2fs_fs(void)
+{
+ remove_proc_entry("fs/f2fs", NULL);
+ f2fs_destroy_root_stats();
+ unregister_filesystem(&f2fs_fs_type);
+ destroy_checkpoint_caches();
+ destroy_segment_manager_caches();
+ destroy_node_manager_caches();
+ destroy_inodecache();
+ kset_unregister(f2fs_kset);
+ f2fs_destroy_trace_ios();
+}
+
+module_init(init_f2fs_fs)
+module_exit(exit_f2fs_fs)
+
+MODULE_AUTHOR("Samsung Electronics's Praesto Team");
+MODULE_DESCRIPTION("Flash Friendly File System");
+MODULE_LICENSE("GPL");
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
new file mode 100644
index 0000000..875aa81
--- /dev/null
+++ b/fs/f2fs/trace.c
@@ -0,0 +1,159 @@
+/*
+ * f2fs IO tracer
+ *
+ * Copyright (c) 2014 Motorola Mobility
+ * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/fs.h>
+#include <linux/f2fs_fs.h>
+#include <linux/sched.h>
+#include <linux/radix-tree.h>
+
+#include "f2fs.h"
+#include "trace.h"
+
+static RADIX_TREE(pids, GFP_ATOMIC);
+static spinlock_t pids_lock;
+static struct last_io_info last_io;
+
+static inline void __print_last_io(void)
+{
+ if (!last_io.len)
+ return;
+
+ trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n",
+ last_io.major, last_io.minor,
+ last_io.pid, "----------------",
+ last_io.type,
+ last_io.fio.rw, last_io.fio.blk_addr,
+ last_io.len);
+ memset(&last_io, 0, sizeof(last_io));
+}
+
+static int __file_type(struct inode *inode, pid_t pid)
+{
+ if (f2fs_is_atomic_file(inode))
+ return __ATOMIC_FILE;
+ else if (f2fs_is_volatile_file(inode))
+ return __VOLATILE_FILE;
+ else if (S_ISDIR(inode->i_mode))
+ return __DIR_FILE;
+ else if (inode->i_ino == F2FS_NODE_INO(F2FS_I_SB(inode)))
+ return __NODE_FILE;
+ else if (inode->i_ino == F2FS_META_INO(F2FS_I_SB(inode)))
+ return __META_FILE;
+ else if (pid)
+ return __NORMAL_FILE;
+ else
+ return __MISC_FILE;
+}
+
+void f2fs_trace_pid(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ pid_t pid = task_pid_nr(current);
+ void *p;
+
+ page->private = pid;
+
+ if (radix_tree_preload(GFP_NOFS))
+ return;
+
+ spin_lock(&pids_lock);
+ p = radix_tree_lookup(&pids, pid);
+ if (p == current)
+ goto out;
+ if (p)
+ radix_tree_delete(&pids, pid);
+
+ f2fs_radix_tree_insert(&pids, pid, current);
+
+ trace_printk("%3x:%3x %4x %-16s\n",
+ MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
+ pid, current->comm);
+out:
+ spin_unlock(&pids_lock);
+ radix_tree_preload_end();
+}
+
+void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush)
+{
+ struct inode *inode;
+ pid_t pid;
+ int major, minor;
+
+ if (flush) {
+ __print_last_io();
+ return;
+ }
+
+ inode = page->mapping->host;
+ pid = page_private(page);
+
+ major = MAJOR(inode->i_sb->s_dev);
+ minor = MINOR(inode->i_sb->s_dev);
+
+ if (last_io.major == major && last_io.minor == minor &&
+ last_io.pid == pid &&
+ last_io.type == __file_type(inode, pid) &&
+ last_io.fio.rw == fio->rw &&
+ last_io.fio.blk_addr + last_io.len == fio->blk_addr) {
+ last_io.len++;
+ return;
+ }
+
+ __print_last_io();
+
+ last_io.major = major;
+ last_io.minor = minor;
+ last_io.pid = pid;
+ last_io.type = __file_type(inode, pid);
+ last_io.fio = *fio;
+ last_io.len = 1;
+ return;
+}
+
+void f2fs_build_trace_ios(void)
+{
+ spin_lock_init(&pids_lock);
+}
+
+#define PIDVEC_SIZE 128
+static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index,
+ unsigned int max_items)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ unsigned int ret = 0;
+
+ if (unlikely(!max_items))
+ return 0;
+
+ radix_tree_for_each_slot(slot, &pids, &iter, first_index) {
+ results[ret] = iter.index;
+ if (++ret == PIDVEC_SIZE)
+ break;
+ }
+ return ret;
+}
+
+void f2fs_destroy_trace_ios(void)
+{
+ pid_t pid[PIDVEC_SIZE];
+ pid_t next_pid = 0;
+ unsigned int found;
+
+ spin_lock(&pids_lock);
+ while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
+ unsigned idx;
+
+ next_pid = pid[found - 1] + 1;
+ for (idx = 0; idx < found; idx++)
+ radix_tree_delete(&pids, pid[idx]);
+ }
+ spin_unlock(&pids_lock);
+}
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
new file mode 100644
index 0000000..1041dbe
--- /dev/null
+++ b/fs/f2fs/trace.h
@@ -0,0 +1,46 @@
+/*
+ * f2fs IO tracer
+ *
+ * Copyright (c) 2014 Motorola Mobility
+ * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __F2FS_TRACE_H__
+#define __F2FS_TRACE_H__
+
+#ifdef CONFIG_F2FS_IO_TRACE
+#include <trace/events/f2fs.h>
+
+enum file_type {
+ __NORMAL_FILE,
+ __DIR_FILE,
+ __NODE_FILE,
+ __META_FILE,
+ __ATOMIC_FILE,
+ __VOLATILE_FILE,
+ __MISC_FILE,
+};
+
+struct last_io_info {
+ int major, minor;
+ pid_t pid;
+ enum file_type type;
+ struct f2fs_io_info fio;
+ block_t len;
+};
+
+extern void f2fs_trace_pid(struct page *);
+extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int);
+extern void f2fs_build_trace_ios(void);
+extern void f2fs_destroy_trace_ios(void);
+#else
+#define f2fs_trace_pid(p)
+#define f2fs_trace_ios(p, i, n)
+#define f2fs_build_trace_ios()
+#define f2fs_destroy_trace_ios()
+
+#endif
+#endif /* __F2FS_TRACE_H__ */
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
new file mode 100644
index 0000000..2f49a58
--- /dev/null
+++ b/fs/f2fs/xattr.c
@@ -0,0 +1,615 @@
+/*
+ * fs/f2fs/xattr.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/xattr.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
+ *
+ * Fix by Harrison Xing <harrison@mountainviewdata.com>.
+ * Extended attributes for symlinks and special files added per
+ * suggestion of Luka Renko <luka.renko@hermes.si>.
+ * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
+ * Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/rwsem.h>
+#include <linux/f2fs_fs.h>
+#include <linux/security.h>
+#include "f2fs.h"
+#include "xattr.h"
+
+static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t len, int type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+ int total_len, prefix_len = 0;
+ const char *prefix = NULL;
+
+ switch (type) {
+ case F2FS_XATTR_INDEX_USER:
+ if (!test_opt(sbi, XATTR_USER))
+ return -EOPNOTSUPP;
+ prefix = XATTR_USER_PREFIX;
+ prefix_len = XATTR_USER_PREFIX_LEN;
+ break;
+ case F2FS_XATTR_INDEX_TRUSTED:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ prefix = XATTR_TRUSTED_PREFIX;
+ prefix_len = XATTR_TRUSTED_PREFIX_LEN;
+ break;
+ case F2FS_XATTR_INDEX_SECURITY:
+ prefix = XATTR_SECURITY_PREFIX;
+ prefix_len = XATTR_SECURITY_PREFIX_LEN;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ total_len = prefix_len + len + 1;
+ if (list && total_len <= list_size) {
+ memcpy(list, prefix, prefix_len);
+ memcpy(list + prefix_len, name, len);
+ list[prefix_len + len] = '\0';
+ }
+ return total_len;
+}
+
+static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+
+ switch (type) {
+ case F2FS_XATTR_INDEX_USER:
+ if (!test_opt(sbi, XATTR_USER))
+ return -EOPNOTSUPP;
+ break;
+ case F2FS_XATTR_INDEX_TRUSTED:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ break;
+ case F2FS_XATTR_INDEX_SECURITY:
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+ return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL);
+}
+
+static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
+
+ switch (type) {
+ case F2FS_XATTR_INDEX_USER:
+ if (!test_opt(sbi, XATTR_USER))
+ return -EOPNOTSUPP;
+ break;
+ case F2FS_XATTR_INDEX_TRUSTED:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ break;
+ case F2FS_XATTR_INDEX_SECURITY:
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ return f2fs_setxattr(dentry->d_inode, type, name,
+ value, size, NULL, flags);
+}
+
+static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t len, int type)
+{
+ const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
+ size_t size;
+
+ if (type != F2FS_XATTR_INDEX_ADVISE)
+ return 0;
+
+ size = strlen(xname) + 1;
+ if (list && size <= list_size)
+ memcpy(list, xname, size);
+ return size;
+}
+
+static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
+{
+ struct inode *inode = dentry->d_inode;
+
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+
+ *((char *)buffer) = F2FS_I(inode)->i_advise;
+ return sizeof(char);
+}
+
+static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
+{
+ struct inode *inode = dentry->d_inode;
+
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ if (!inode_owner_or_capable(inode))
+ return -EPERM;
+ if (value == NULL)
+ return -EINVAL;
+
+ F2FS_I(inode)->i_advise |= *(char *)value;
+ return 0;
+}
+
+#ifdef CONFIG_F2FS_FS_SECURITY
+static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+ void *page)
+{
+ const struct xattr *xattr;
+ int err = 0;
+
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
+ xattr->name, xattr->value,
+ xattr->value_len, (struct page *)page, 0);
+ if (err < 0)
+ break;
+ }
+ return err;
+}
+
+int f2fs_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr, struct page *ipage)
+{
+ return security_inode_init_security(inode, dir, qstr,
+ &f2fs_initxattrs, ipage);
+}
+#endif
+
+const struct xattr_handler f2fs_xattr_user_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .flags = F2FS_XATTR_INDEX_USER,
+ .list = f2fs_xattr_generic_list,
+ .get = f2fs_xattr_generic_get,
+ .set = f2fs_xattr_generic_set,
+};
+
+const struct xattr_handler f2fs_xattr_trusted_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .flags = F2FS_XATTR_INDEX_TRUSTED,
+ .list = f2fs_xattr_generic_list,
+ .get = f2fs_xattr_generic_get,
+ .set = f2fs_xattr_generic_set,
+};
+
+const struct xattr_handler f2fs_xattr_advise_handler = {
+ .prefix = F2FS_SYSTEM_ADVISE_PREFIX,
+ .flags = F2FS_XATTR_INDEX_ADVISE,
+ .list = f2fs_xattr_advise_list,
+ .get = f2fs_xattr_advise_get,
+ .set = f2fs_xattr_advise_set,
+};
+
+const struct xattr_handler f2fs_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .flags = F2FS_XATTR_INDEX_SECURITY,
+ .list = f2fs_xattr_generic_list,
+ .get = f2fs_xattr_generic_get,
+ .set = f2fs_xattr_generic_set,
+};
+
+static const struct xattr_handler *f2fs_xattr_handler_map[] = {
+ [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+ [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler,
+ [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler,
+#endif
+ [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler,
+#ifdef CONFIG_F2FS_FS_SECURITY
+ [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler,
+#endif
+ [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler,
+};
+
+const struct xattr_handler *f2fs_xattr_handlers[] = {
+ &f2fs_xattr_user_handler,
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+ &f2fs_xattr_acl_access_handler,
+ &f2fs_xattr_acl_default_handler,
+#endif
+ &f2fs_xattr_trusted_handler,
+#ifdef CONFIG_F2FS_FS_SECURITY
+ &f2fs_xattr_security_handler,
+#endif
+ &f2fs_xattr_advise_handler,
+ NULL,
+};
+
+static inline const struct xattr_handler *f2fs_xattr_handler(int index)
+{
+ const struct xattr_handler *handler = NULL;
+
+ if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map))
+ handler = f2fs_xattr_handler_map[index];
+ return handler;
+}
+
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
+ size_t len, const char *name)
+{
+ struct f2fs_xattr_entry *entry;
+
+ list_for_each_xattr(entry, base_addr) {
+ if (entry->e_name_index != index)
+ continue;
+ if (entry->e_name_len != len)
+ continue;
+ if (!memcmp(entry->e_name, name, len))
+ break;
+ }
+ return entry;
+}
+
+static void *read_all_xattrs(struct inode *inode, struct page *ipage)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_xattr_header *header;
+ size_t size = PAGE_SIZE, inline_size = 0;
+ void *txattr_addr;
+
+ inline_size = inline_xattr_size(inode);
+
+ txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
+ if (!txattr_addr)
+ return NULL;
+
+ /* read from inline xattr */
+ if (inline_size) {
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(ipage);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page))
+ goto fail;
+ inline_addr = inline_xattr_addr(page);
+ }
+ memcpy(txattr_addr, inline_addr, inline_size);
+ f2fs_put_page(page, 1);
+ }
+
+ /* read from xattr node block */
+ if (F2FS_I(inode)->i_xattr_nid) {
+ struct page *xpage;
+ void *xattr_addr;
+
+ /* The inode already has an extended attribute block. */
+ xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
+ if (IS_ERR(xpage))
+ goto fail;
+
+ xattr_addr = page_address(xpage);
+ memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
+ f2fs_put_page(xpage, 1);
+ }
+
+ header = XATTR_HDR(txattr_addr);
+
+ /* never been allocated xattrs */
+ if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
+ header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
+ header->h_refcount = cpu_to_le32(1);
+ }
+ return txattr_addr;
+fail:
+ kzfree(txattr_addr);
+ return NULL;
+}
+
+static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
+ void *txattr_addr, struct page *ipage)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ size_t inline_size = 0;
+ void *xattr_addr;
+ struct page *xpage;
+ nid_t new_nid = 0;
+ int err;
+
+ inline_size = inline_xattr_size(inode);
+
+ if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
+ if (!alloc_nid(sbi, &new_nid))
+ return -ENOSPC;
+
+ /* write to inline xattr */
+ if (inline_size) {
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page)) {
+ alloc_nid_failed(sbi, new_nid);
+ return PTR_ERR(page);
+ }
+ inline_addr = inline_xattr_addr(page);
+ f2fs_wait_on_page_writeback(page, NODE);
+ }
+ memcpy(inline_addr, txattr_addr, inline_size);
+ f2fs_put_page(page, 1);
+
+ /* no need to use xattr node block */
+ if (hsize <= inline_size) {
+ err = truncate_xattr_node(inode, ipage);
+ alloc_nid_failed(sbi, new_nid);
+ return err;
+ }
+ }
+
+ /* write to xattr node block */
+ if (F2FS_I(inode)->i_xattr_nid) {
+ xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
+ if (IS_ERR(xpage)) {
+ alloc_nid_failed(sbi, new_nid);
+ return PTR_ERR(xpage);
+ }
+ f2fs_bug_on(sbi, new_nid);
+ f2fs_wait_on_page_writeback(xpage, NODE);
+ } else {
+ struct dnode_of_data dn;
+ set_new_dnode(&dn, inode, NULL, NULL, new_nid);
+ xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage);
+ if (IS_ERR(xpage)) {
+ alloc_nid_failed(sbi, new_nid);
+ return PTR_ERR(xpage);
+ }
+ alloc_nid_done(sbi, new_nid);
+ }
+
+ xattr_addr = page_address(xpage);
+ memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
+ sizeof(struct node_footer));
+ set_page_dirty(xpage);
+ f2fs_put_page(xpage, 1);
+
+ /* need to checkpoint during fsync */
+ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
+ return 0;
+}
+
+int f2fs_getxattr(struct inode *inode, int index, const char *name,
+ void *buffer, size_t buffer_size, struct page *ipage)
+{
+ struct f2fs_xattr_entry *entry;
+ void *base_addr;
+ int error = 0;
+ size_t size, len;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ len = strlen(name);
+ if (len > F2FS_NAME_LEN)
+ return -ERANGE;
+
+ base_addr = read_all_xattrs(inode, ipage);
+ if (!base_addr)
+ return -ENOMEM;
+
+ entry = __find_xattr(base_addr, index, len, name);
+ if (IS_XATTR_LAST_ENTRY(entry)) {
+ error = -ENODATA;
+ goto cleanup;
+ }
+
+ size = le16_to_cpu(entry->e_value_size);
+
+ if (buffer && size > buffer_size) {
+ error = -ERANGE;
+ goto cleanup;
+ }
+
+ if (buffer) {
+ char *pval = entry->e_name + entry->e_name_len;
+ memcpy(buffer, pval, size);
+ }
+ error = size;
+
+cleanup:
+ kzfree(base_addr);
+ return error;
+}
+
+ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
+{
+ struct inode *inode = dentry->d_inode;
+ struct f2fs_xattr_entry *entry;
+ void *base_addr;
+ int error = 0;
+ size_t rest = buffer_size;
+
+ base_addr = read_all_xattrs(inode, NULL);
+ if (!base_addr)
+ return -ENOMEM;
+
+ list_for_each_xattr(entry, base_addr) {
+ const struct xattr_handler *handler =
+ f2fs_xattr_handler(entry->e_name_index);
+ size_t size;
+
+ if (!handler)
+ continue;
+
+ size = handler->list(dentry, buffer, rest, entry->e_name,
+ entry->e_name_len, handler->flags);
+ if (buffer && size > rest) {
+ error = -ERANGE;
+ goto cleanup;
+ }
+
+ if (buffer)
+ buffer += size;
+ rest -= size;
+ }
+ error = buffer_size - rest;
+cleanup:
+ kzfree(base_addr);
+ return error;
+}
+
+static int __f2fs_setxattr(struct inode *inode, int index,
+ const char *name, const void *value, size_t size,
+ struct page *ipage, int flags)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_xattr_entry *here, *last;
+ void *base_addr;
+ int found, newsize;
+ size_t len;
+ __u32 new_hsize;
+ int error = -ENOMEM;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ if (value == NULL)
+ size = 0;
+
+ len = strlen(name);
+
+ if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode))
+ return -ERANGE;
+
+ base_addr = read_all_xattrs(inode, ipage);
+ if (!base_addr)
+ goto exit;
+
+ /* find entry with wanted name. */
+ here = __find_xattr(base_addr, index, len, name);
+
+ found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
+
+ if ((flags & XATTR_REPLACE) && !found) {
+ error = -ENODATA;
+ goto exit;
+ } else if ((flags & XATTR_CREATE) && found) {
+ error = -EEXIST;
+ goto exit;
+ }
+
+ last = here;
+ while (!IS_XATTR_LAST_ENTRY(last))
+ last = XATTR_NEXT_ENTRY(last);
+
+ newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
+
+ /* 1. Check space */
+ if (value) {
+ int free;
+ /*
+ * If value is NULL, it is remove operation.
+ * In case of update operation, we calculate free.
+ */
+ free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
+ if (found)
+ free = free + ENTRY_SIZE(here);
+
+ if (unlikely(free < newsize)) {
+ error = -ENOSPC;
+ goto exit;
+ }
+ }
+
+ /* 2. Remove old entry */
+ if (found) {
+ /*
+ * If entry is found, remove old entry.
+ * If not found, remove operation is not needed.
+ */
+ struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here);
+ int oldsize = ENTRY_SIZE(here);
+
+ memmove(here, next, (char *)last - (char *)next);
+ last = (struct f2fs_xattr_entry *)((char *)last - oldsize);
+ memset(last, 0, oldsize);
+ }
+
+ new_hsize = (char *)last - (char *)base_addr;
+
+ /* 3. Write new entry */
+ if (value) {
+ char *pval;
+ /*
+ * Before we come here, old entry is removed.
+ * We just write new entry.
+ */
+ memset(last, 0, newsize);
+ last->e_name_index = index;
+ last->e_name_len = len;
+ memcpy(last->e_name, name, len);
+ pval = last->e_name + len;
+ memcpy(pval, value, size);
+ last->e_value_size = cpu_to_le16(size);
+ new_hsize += newsize;
+ }
+
+ error = write_all_xattrs(inode, new_hsize, base_addr, ipage);
+ if (error)
+ goto exit;
+
+ if (is_inode_flag_set(fi, FI_ACL_MODE)) {
+ inode->i_mode = fi->i_acl_mode;
+ inode->i_ctime = CURRENT_TIME;
+ clear_inode_flag(fi, FI_ACL_MODE);
+ }
+
+ if (ipage)
+ update_inode(inode, ipage);
+ else
+ update_inode_page(inode);
+exit:
+ kzfree(base_addr);
+ return error;
+}
+
+int f2fs_setxattr(struct inode *inode, int index, const char *name,
+ const void *value, size_t size,
+ struct page *ipage, int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ int err;
+
+ /* this case is only from init_inode_metadata */
+ if (ipage)
+ return __f2fs_setxattr(inode, index, name, value,
+ size, ipage, flags);
+ f2fs_balance_fs(sbi);
+
+ f2fs_lock_op(sbi);
+ /* protect xattr_ver */
+ down_write(&F2FS_I(inode)->i_sem);
+ err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
+ up_write(&F2FS_I(inode)->i_sem);
+ f2fs_unlock_op(sbi);
+
+ return err;
+}
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
new file mode 100644
index 0000000..95b55a0
--- /dev/null
+++ b/fs/f2fs/xattr.h
@@ -0,0 +1,154 @@
+/*
+ * fs/f2fs/xattr.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/xattr.h
+ *
+ * On-disk format of extended attributes for the ext2 filesystem.
+ *
+ * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __F2FS_XATTR_H__
+#define __F2FS_XATTR_H__
+
+#include <linux/init.h>
+#include <linux/xattr.h>
+
+/* Magic value in attribute blocks */
+#define F2FS_XATTR_MAGIC 0xF2F52011
+
+/* Maximum number of references to one attribute block */
+#define F2FS_XATTR_REFCOUNT_MAX 1024
+
+/* Name indexes */
+#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise"
+#define F2FS_XATTR_INDEX_USER 1
+#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2
+#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+#define F2FS_XATTR_INDEX_TRUSTED 4
+#define F2FS_XATTR_INDEX_LUSTRE 5
+#define F2FS_XATTR_INDEX_SECURITY 6
+#define F2FS_XATTR_INDEX_ADVISE 7
+
+struct f2fs_xattr_header {
+ __le32 h_magic; /* magic number for identification */
+ __le32 h_refcount; /* reference count */
+ __u32 h_reserved[4]; /* zero right now */
+};
+
+struct f2fs_xattr_entry {
+ __u8 e_name_index;
+ __u8 e_name_len;
+ __le16 e_value_size; /* size of attribute value */
+ char e_name[0]; /* attribute name */
+};
+
+#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr))
+#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr))
+#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1))
+#define XATTR_ROUND (3)
+
+#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND)
+
+#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \
+ entry->e_name_len + le16_to_cpu(entry->e_value_size)))
+
+#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\
+ ENTRY_SIZE(entry)))
+
+#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
+
+#define list_for_each_xattr(entry, addr) \
+ for (entry = XATTR_FIRST_ENTRY(addr);\
+ !IS_XATTR_LAST_ENTRY(entry);\
+ entry = XATTR_NEXT_ENTRY(entry))
+
+#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \
+ sizeof(struct node_footer) - sizeof(__u32))
+
+#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
+ sizeof(struct f2fs_xattr_header) - \
+ sizeof(struct f2fs_xattr_entry))
+
+/*
+ * On-disk structure of f2fs_xattr
+ * We use inline xattrs space + 1 block for xattr.
+ *
+ * +--------------------+
+ * | f2fs_xattr_header |
+ * | |
+ * +--------------------+
+ * | f2fs_xattr_entry |
+ * | .e_name_index = 1 |
+ * | .e_name_len = 3 |
+ * | .e_value_size = 14 |
+ * | .e_name = "foo" |
+ * | "value_of_xattr" |<- value_offs = e_name + e_name_len
+ * +--------------------+
+ * | f2fs_xattr_entry |
+ * | .e_name_index = 4 |
+ * | .e_name = "bar" |
+ * +--------------------+
+ * | |
+ * | Free |
+ * | |
+ * +--------------------+<- MIN_OFFSET
+ * | node_footer |
+ * | (nid, ino, offset) |
+ * +--------------------+
+ *
+ **/
+
+#ifdef CONFIG_F2FS_FS_XATTR
+extern const struct xattr_handler f2fs_xattr_user_handler;
+extern const struct xattr_handler f2fs_xattr_trusted_handler;
+extern const struct xattr_handler f2fs_xattr_acl_access_handler;
+extern const struct xattr_handler f2fs_xattr_acl_default_handler;
+extern const struct xattr_handler f2fs_xattr_advise_handler;
+extern const struct xattr_handler f2fs_xattr_security_handler;
+
+extern const struct xattr_handler *f2fs_xattr_handlers[];
+
+extern int f2fs_setxattr(struct inode *, int, const char *,
+ const void *, size_t, struct page *, int);
+extern int f2fs_getxattr(struct inode *, int, const char *, void *,
+ size_t, struct page *);
+extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
+#else
+
+#define f2fs_xattr_handlers NULL
+static inline int f2fs_setxattr(struct inode *inode, int index,
+ const char *name, const void *value, size_t size, int flags)
+{
+ return -EOPNOTSUPP;
+}
+static inline int f2fs_getxattr(struct inode *inode, int index,
+ const char *name, void *buffer,
+ size_t buffer_size, struct page *dpage)
+{
+ return -EOPNOTSUPP;
+}
+static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
+ size_t buffer_size)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#ifdef CONFIG_F2FS_FS_SECURITY
+extern int f2fs_init_security(struct inode *, struct inode *,
+ const struct qstr *, struct page *);
+#else
+static inline int f2fs_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr, struct page *ipage)
+{
+ return 0;
+}
+#endif
+#endif /* __F2FS_XATTR_H__ */
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
new file mode 100644
index 0000000..a23556c
--- /dev/null
+++ b/include/linux/f2fs_fs.h
@@ -0,0 +1,475 @@
+/**
+ * include/linux/f2fs_fs.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_F2FS_FS_H
+#define _LINUX_F2FS_FS_H
+
+#include <linux/pagemap.h>
+#include <linux/types.h>
+
+#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */
+#define F2FS_MIN_LOG_SECTOR_SIZE 9 /* 9 bits for 512 bytes */
+#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */
+#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */
+#define F2FS_BLKSIZE 4096 /* support only 4KB block */
+#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */
+#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
+#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
+
+#define NULL_ADDR ((block_t)0) /* used as block_t addresses */
+#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */
+
+#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS)
+#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS)
+
+/* 0, 1(node nid), 2(meta nid) are reserved node id */
+#define F2FS_RESERVED_NODE_NUM 3
+
+#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num)
+#define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
+#define F2FS_META_INO(sbi) (sbi->meta_ino_num)
+
+/* This flag is used by node and meta inodes, and by recovery */
+#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO)
+#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
+
+/*
+ * For further optimization on multi-head logs, on-disk layout supports maximum
+ * 16 logs by default. The number, 16, is expected to cover all the cases
+ * enoughly. The implementaion currently uses no more than 6 logs.
+ * Half the logs are used for nodes, and the other half are used for data.
+ */
+#define MAX_ACTIVE_LOGS 16
+#define MAX_ACTIVE_NODE_LOGS 8
+#define MAX_ACTIVE_DATA_LOGS 8
+
+/*
+ * For superblock
+ */
+struct f2fs_super_block {
+ __le32 magic; /* Magic Number */
+ __le16 major_ver; /* Major Version */
+ __le16 minor_ver; /* Minor Version */
+ __le32 log_sectorsize; /* log2 sector size in bytes */
+ __le32 log_sectors_per_block; /* log2 # of sectors per block */
+ __le32 log_blocksize; /* log2 block size in bytes */
+ __le32 log_blocks_per_seg; /* log2 # of blocks per segment */
+ __le32 segs_per_sec; /* # of segments per section */
+ __le32 secs_per_zone; /* # of sections per zone */
+ __le32 checksum_offset; /* checksum offset inside super block */
+ __le64 block_count; /* total # of user blocks */
+ __le32 section_count; /* total # of sections */
+ __le32 segment_count; /* total # of segments */
+ __le32 segment_count_ckpt; /* # of segments for checkpoint */
+ __le32 segment_count_sit; /* # of segments for SIT */
+ __le32 segment_count_nat; /* # of segments for NAT */
+ __le32 segment_count_ssa; /* # of segments for SSA */
+ __le32 segment_count_main; /* # of segments for main area */
+ __le32 segment0_blkaddr; /* start block address of segment 0 */
+ __le32 cp_blkaddr; /* start block address of checkpoint */
+ __le32 sit_blkaddr; /* start block address of SIT */
+ __le32 nat_blkaddr; /* start block address of NAT */
+ __le32 ssa_blkaddr; /* start block address of SSA */
+ __le32 main_blkaddr; /* start block address of main area */
+ __le32 root_ino; /* root inode number */
+ __le32 node_ino; /* node inode number */
+ __le32 meta_ino; /* meta inode number */
+ __u8 uuid[16]; /* 128-bit uuid for volume */
+ __le16 volume_name[512]; /* volume name */
+ __le32 extension_count; /* # of extensions below */
+ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */
+ __le32 cp_payload;
+} __packed;
+
+/*
+ * For checkpoint
+ */
+#define CP_FASTBOOT_FLAG 0x00000020
+#define CP_FSCK_FLAG 0x00000010
+#define CP_ERROR_FLAG 0x00000008
+#define CP_COMPACT_SUM_FLAG 0x00000004
+#define CP_ORPHAN_PRESENT_FLAG 0x00000002
+#define CP_UMOUNT_FLAG 0x00000001
+
+#define F2FS_CP_PACKS 2 /* # of checkpoint packs */
+
+struct f2fs_checkpoint {
+ __le64 checkpoint_ver; /* checkpoint block version number */
+ __le64 user_block_count; /* # of user blocks */
+ __le64 valid_block_count; /* # of valid blocks in main area */
+ __le32 rsvd_segment_count; /* # of reserved segments for gc */
+ __le32 overprov_segment_count; /* # of overprovision segments */
+ __le32 free_segment_count; /* # of free segments in main area */
+
+ /* information of current node segments */
+ __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS];
+ __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS];
+ /* information of current data segments */
+ __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS];
+ __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS];
+ __le32 ckpt_flags; /* Flags : umount and journal_present */
+ __le32 cp_pack_total_block_count; /* total # of one cp pack */
+ __le32 cp_pack_start_sum; /* start block number of data summary */
+ __le32 valid_node_count; /* Total number of valid nodes */
+ __le32 valid_inode_count; /* Total number of valid inodes */
+ __le32 next_free_nid; /* Next free node number */
+ __le32 sit_ver_bitmap_bytesize; /* Default value 64 */
+ __le32 nat_ver_bitmap_bytesize; /* Default value 256 */
+ __le32 checksum_offset; /* checksum offset inside cp block */
+ __le64 elapsed_time; /* mounted time */
+ /* allocation type of current segment */
+ unsigned char alloc_type[MAX_ACTIVE_LOGS];
+
+ /* SIT and NAT version bitmap */
+ unsigned char sit_nat_version_bitmap[1];
+} __packed;
+
+/*
+ * For orphan inode management
+ */
+#define F2FS_ORPHANS_PER_BLOCK 1020
+
+#define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \
+ F2FS_ORPHANS_PER_BLOCK)
+
+struct f2fs_orphan_block {
+ __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */
+ __le32 reserved; /* reserved */
+ __le16 blk_addr; /* block index in current CP */
+ __le16 blk_count; /* Number of orphan inode blocks in CP */
+ __le32 entry_count; /* Total number of orphan nodes in current CP */
+ __le32 check_sum; /* CRC32 for orphan inode block */
+} __packed;
+
+/*
+ * For NODE structure
+ */
+struct f2fs_extent {
+ __le32 fofs; /* start file offset of the extent */
+ __le32 blk_addr; /* start block address of the extent */
+ __le32 len; /* lengh of the extent */
+} __packed;
+
+#define F2FS_NAME_LEN 255
+#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */
+#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
+#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */
+#define ADDRS_PER_INODE(fi) addrs_per_inode(fi)
+#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
+#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */
+
+#define ADDRS_PER_PAGE(page, fi) \
+ (IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
+
+#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1)
+#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2)
+#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3)
+#define NODE_IND2_BLOCK (DEF_ADDRS_PER_INODE + 4)
+#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5)
+
+#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */
+#define F2FS_INLINE_DATA 0x02 /* file inline data flag */
+#define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */
+#define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */
+
+#define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
+ F2FS_INLINE_XATTR_ADDRS - 1))
+
+struct f2fs_inode {
+ __le16 i_mode; /* file mode */
+ __u8 i_advise; /* file hints */
+ __u8 i_inline; /* file inline flags */
+ __le32 i_uid; /* user ID */
+ __le32 i_gid; /* group ID */
+ __le32 i_links; /* links count */
+ __le64 i_size; /* file size in bytes */
+ __le64 i_blocks; /* file size in blocks */
+ __le64 i_atime; /* access time */
+ __le64 i_ctime; /* change time */
+ __le64 i_mtime; /* modification time */
+ __le32 i_atime_nsec; /* access time in nano scale */
+ __le32 i_ctime_nsec; /* change time in nano scale */
+ __le32 i_mtime_nsec; /* modification time in nano scale */
+ __le32 i_generation; /* file version (for NFS) */
+ __le32 i_current_depth; /* only for directory depth */
+ __le32 i_xattr_nid; /* nid to save xattr */
+ __le32 i_flags; /* file attributes */
+ __le32 i_pino; /* parent inode number */
+ __le32 i_namelen; /* file name length */
+ __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */
+ __u8 i_dir_level; /* dentry_level for large dir */
+
+ struct f2fs_extent i_ext; /* caching a largest extent */
+
+ __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */
+
+ __le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2),
+ double_indirect(1) node id */
+} __packed;
+
+struct direct_node {
+ __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */
+} __packed;
+
+struct indirect_node {
+ __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */
+} __packed;
+
+enum {
+ COLD_BIT_SHIFT = 0,
+ FSYNC_BIT_SHIFT,
+ DENT_BIT_SHIFT,
+ OFFSET_BIT_SHIFT
+};
+
+#define OFFSET_BIT_MASK (0x07) /* (0x01 << OFFSET_BIT_SHIFT) - 1 */
+
+struct node_footer {
+ __le32 nid; /* node id */
+ __le32 ino; /* inode nunmber */
+ __le32 flag; /* include cold/fsync/dentry marks and offset */
+ __le64 cp_ver; /* checkpoint version */
+ __le32 next_blkaddr; /* next node page block address */
+} __packed;
+
+struct f2fs_node {
+ /* can be one of three types: inode, direct, and indirect types */
+ union {
+ struct f2fs_inode i;
+ struct direct_node dn;
+ struct indirect_node in;
+ };
+ struct node_footer footer;
+} __packed;
+
+/*
+ * For NAT entries
+ */
+#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
+
+struct f2fs_nat_entry {
+ __u8 version; /* latest version of cached nat entry */
+ __le32 ino; /* inode number */
+ __le32 block_addr; /* block address */
+} __packed;
+
+struct f2fs_nat_block {
+ struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For SIT entries
+ *
+ * Each segment is 2MB in size by default so that a bitmap for validity of
+ * there-in blocks should occupy 64 bytes, 512 bits.
+ * Not allow to change this.
+ */
+#define SIT_VBLOCK_MAP_SIZE 64
+#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
+
+/*
+ * Note that f2fs_sit_entry->vblocks has the following bit-field information.
+ * [15:10] : allocation type such as CURSEG_XXXX_TYPE
+ * [9:0] : valid block count
+ */
+#define SIT_VBLOCKS_SHIFT 10
+#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1)
+#define GET_SIT_VBLOCKS(raw_sit) \
+ (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK)
+#define GET_SIT_TYPE(raw_sit) \
+ ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \
+ >> SIT_VBLOCKS_SHIFT)
+
+struct f2fs_sit_entry {
+ __le16 vblocks; /* reference above */
+ __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */
+ __le64 mtime; /* segment age for cleaning */
+} __packed;
+
+struct f2fs_sit_block {
+ struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For segment summary
+ *
+ * One summary block contains exactly 512 summary entries, which represents
+ * exactly 2MB segment by default. Not allow to change the basic units.
+ *
+ * NOTE: For initializing fields, you must use set_summary
+ *
+ * - If data page, nid represents dnode's nid
+ * - If node page, nid represents the node page's nid.
+ *
+ * The ofs_in_node is used by only data page. It represents offset
+ * from node's page's beginning to get a data block address.
+ * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
+ */
+#define ENTRIES_IN_SUM 512
+#define SUMMARY_SIZE (7) /* sizeof(struct summary) */
+#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */
+#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM)
+
+/* a summary entry for a 4KB-sized block in a segment */
+struct f2fs_summary {
+ __le32 nid; /* parent node id */
+ union {
+ __u8 reserved[3];
+ struct {
+ __u8 version; /* node version number */
+ __le16 ofs_in_node; /* block index in parent node */
+ } __packed;
+ };
+} __packed;
+
+/* summary block type, node or data, is stored to the summary_footer */
+#define SUM_TYPE_NODE (1)
+#define SUM_TYPE_DATA (0)
+
+struct summary_footer {
+ unsigned char entry_type; /* SUM_TYPE_XXX */
+ __u32 check_sum; /* summary checksum */
+} __packed;
+
+#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
+ SUM_ENTRY_SIZE)
+#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\
+ sizeof(struct nat_journal_entry))
+#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\
+ sizeof(struct nat_journal_entry))
+#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\
+ sizeof(struct sit_journal_entry))
+#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\
+ sizeof(struct sit_journal_entry))
+/*
+ * frequently updated NAT/SIT entries can be stored in the spare area in
+ * summary blocks
+ */
+enum {
+ NAT_JOURNAL = 0,
+ SIT_JOURNAL
+};
+
+struct nat_journal_entry {
+ __le32 nid;
+ struct f2fs_nat_entry ne;
+} __packed;
+
+struct nat_journal {
+ struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES];
+ __u8 reserved[NAT_JOURNAL_RESERVED];
+} __packed;
+
+struct sit_journal_entry {
+ __le32 segno;
+ struct f2fs_sit_entry se;
+} __packed;
+
+struct sit_journal {
+ struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES];
+ __u8 reserved[SIT_JOURNAL_RESERVED];
+} __packed;
+
+/* 4KB-sized summary block structure */
+struct f2fs_summary_block {
+ struct f2fs_summary entries[ENTRIES_IN_SUM];
+ union {
+ __le16 n_nats;
+ __le16 n_sits;
+ };
+ /* spare area is used by NAT or SIT journals */
+ union {
+ struct nat_journal nat_j;
+ struct sit_journal sit_j;
+ };
+ struct summary_footer footer;
+} __packed;
+
+/*
+ * For directory operations
+ */
+#define F2FS_DOT_HASH 0
+#define F2FS_DDOT_HASH F2FS_DOT_HASH
+#define F2FS_MAX_HASH (~((0x3ULL) << 62))
+#define F2FS_HASH_COL_BIT ((0x1ULL) << 63)
+
+typedef __le32 f2fs_hash_t;
+
+/* One directory entry slot covers 8bytes-long file name */
+#define F2FS_SLOT_LEN 8
+#define F2FS_SLOT_LEN_BITS 3
+
+#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS)
+
+/* the number of dentry in a block */
+#define NR_DENTRY_IN_BLOCK 214
+
+/* MAX level for dir lookup */
+#define MAX_DIR_HASH_DEPTH 63
+
+/* MAX buckets in one level of dir */
+#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
+
+#define SIZE_OF_DIR_ENTRY 11 /* by byte */
+#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
+ BITS_PER_BYTE)
+#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
+ F2FS_SLOT_LEN) * \
+ NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
+
+/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */
+struct f2fs_dir_entry {
+ __le32 hash_code; /* hash code of file name */
+ __le32 ino; /* inode number */
+ __le16 name_len; /* lengh of file name */
+ __u8 file_type; /* file type */
+} __packed;
+
+/* 4KB-sized directory entry block */
+struct f2fs_dentry_block {
+ /* validity bitmap for directory entries in each block */
+ __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP];
+ __u8 reserved[SIZE_OF_RESERVED];
+ struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK];
+ __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN];
+} __packed;
+
+/* for inline dir */
+#define NR_INLINE_DENTRY (MAX_INLINE_DATA * BITS_PER_BYTE / \
+ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
+ BITS_PER_BYTE + 1))
+#define INLINE_DENTRY_BITMAP_SIZE ((NR_INLINE_DENTRY + \
+ BITS_PER_BYTE - 1) / BITS_PER_BYTE)
+#define INLINE_RESERVED_SIZE (MAX_INLINE_DATA - \
+ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
+ NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE))
+
+/* inline directory entry structure */
+struct f2fs_inline_dentry {
+ __u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE];
+ __u8 reserved[INLINE_RESERVED_SIZE];
+ struct f2fs_dir_entry dentry[NR_INLINE_DENTRY];
+ __u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN];
+} __packed;
+
+/* file types used in inode_info->flags */
+enum {
+ F2FS_FT_UNKNOWN,
+ F2FS_FT_REG_FILE,
+ F2FS_FT_DIR,
+ F2FS_FT_CHRDEV,
+ F2FS_FT_BLKDEV,
+ F2FS_FT_FIFO,
+ F2FS_FT_SOCK,
+ F2FS_FT_SYMLINK,
+ F2FS_FT_MAX
+};
+
+#endif /* _LINUX_F2FS_FS_H */
diff --git a/include/linux/magic.h b/include/linux/magic.h
index e15192c..66353ff 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -23,6 +23,7 @@
#define EXT4_SUPER_MAGIC 0xEF53
#define BTRFS_SUPER_MAGIC 0x9123683E
#define NILFS_SUPER_MAGIC 0x3434
+#define F2FS_SUPER_MAGIC 0xF2F52010
#define HPFS_SUPER_MAGIC 0xf995e849
#define ISOFS_SUPER_MAGIC 0x9660
#define JFFS2_SUPER_MAGIC 0x72b6
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
new file mode 100644
index 0000000..b83cc02
--- /dev/null
+++ b/include/trace/events/f2fs.h
@@ -0,0 +1,1014 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM f2fs
+
+#if !defined(_TRACE_F2FS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_F2FS_H
+
+#include <linux/tracepoint.h>
+
+#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev)
+#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino
+
+#define show_block_type(type) \
+ __print_symbolic(type, \
+ { NODE, "NODE" }, \
+ { DATA, "DATA" }, \
+ { META, "META" }, \
+ { META_FLUSH, "META_FLUSH" })
+
+#define F2FS_BIO_MASK(t) (t & (READA | WRITE_FLUSH_FUA))
+#define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO))
+
+#define show_bio_type(type) show_bio_base(type), show_bio_extra(type)
+
+#define show_bio_base(type) \
+ __print_symbolic(F2FS_BIO_MASK(type), \
+ { READ, "READ" }, \
+ { READA, "READAHEAD" }, \
+ { READ_SYNC, "READ_SYNC" }, \
+ { WRITE, "WRITE" }, \
+ { WRITE_SYNC, "WRITE_SYNC" }, \
+ { WRITE_FLUSH, "WRITE_FLUSH" }, \
+ { WRITE_FUA, "WRITE_FUA" }, \
+ { WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" })
+
+#define show_bio_extra(type) \
+ __print_symbolic(F2FS_BIO_EXTRA_MASK(type), \
+ { REQ_META, "(M)" }, \
+ { REQ_PRIO, "(P)" }, \
+ { REQ_META | REQ_PRIO, "(MP)" }, \
+ { 0, " \b" })
+
+#define show_data_type(type) \
+ __print_symbolic(type, \
+ { CURSEG_HOT_DATA, "Hot DATA" }, \
+ { CURSEG_WARM_DATA, "Warm DATA" }, \
+ { CURSEG_COLD_DATA, "Cold DATA" }, \
+ { CURSEG_HOT_NODE, "Hot NODE" }, \
+ { CURSEG_WARM_NODE, "Warm NODE" }, \
+ { CURSEG_COLD_NODE, "Cold NODE" }, \
+ { NO_CHECK_TYPE, "No TYPE" })
+
+#define show_file_type(type) \
+ __print_symbolic(type, \
+ { 0, "FILE" }, \
+ { 1, "DIR" })
+
+#define show_gc_type(type) \
+ __print_symbolic(type, \
+ { FG_GC, "Foreground GC" }, \
+ { BG_GC, "Background GC" })
+
+#define show_alloc_mode(type) \
+ __print_symbolic(type, \
+ { LFS, "LFS-mode" }, \
+ { SSR, "SSR-mode" })
+
+#define show_victim_policy(type) \
+ __print_symbolic(type, \
+ { GC_GREEDY, "Greedy" }, \
+ { GC_CB, "Cost-Benefit" })
+
+#define show_cpreason(type) \
+ __print_symbolic(type, \
+ { CP_UMOUNT, "Umount" }, \
+ { CP_FASTBOOT, "Fastboot" }, \
+ { CP_SYNC, "Sync" }, \
+ { CP_DISCARD, "Discard" })
+
+struct victim_sel_policy;
+
+DECLARE_EVENT_CLASS(f2fs__inode,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(ino_t, pino)
+ __field(umode_t, mode)
+ __field(loff_t, size)
+ __field(unsigned int, nlink)
+ __field(blkcnt_t, blocks)
+ __field(__u8, advise)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pino = F2FS_I(inode)->i_pino;
+ __entry->mode = inode->i_mode;
+ __entry->nlink = inode->i_nlink;
+ __entry->size = inode->i_size;
+ __entry->blocks = inode->i_blocks;
+ __entry->advise = F2FS_I(inode)->i_advise;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, pino = %lu, i_mode = 0x%hx, "
+ "i_size = %lld, i_nlink = %u, i_blocks = %llu, i_advise = 0x%x",
+ show_dev_ino(__entry),
+ (unsigned long)__entry->pino,
+ __entry->mode,
+ __entry->size,
+ (unsigned int)__entry->nlink,
+ (unsigned long long)__entry->blocks,
+ (unsigned char)__entry->advise)
+);
+
+DECLARE_EVENT_CLASS(f2fs__inode_exit,
+
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, ret = %d",
+ show_dev_ino(__entry),
+ __entry->ret)
+);
+
+DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+TRACE_EVENT(f2fs_sync_file_exit,
+
+ TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret),
+
+ TP_ARGS(inode, need_cp, datasync, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(int, need_cp)
+ __field(int, datasync)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->need_cp = need_cp;
+ __entry->datasync = datasync;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, "
+ "datasync = %d, ret = %d",
+ show_dev_ino(__entry),
+ __entry->need_cp ? "needed" : "not needed",
+ __entry->datasync,
+ __entry->ret)
+);
+
+TRACE_EVENT(f2fs_sync_fs,
+
+ TP_PROTO(struct super_block *sb, int wait),
+
+ TP_ARGS(sb, wait),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, dirty)
+ __field(int, wait)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->dirty = is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY);
+ __entry->wait = wait;
+ ),
+
+ TP_printk("dev = (%d,%d), superblock is %s, wait = %d",
+ show_dev(__entry),
+ __entry->dirty ? "dirty" : "not dirty",
+ __entry->wait)
+);
+
+DEFINE_EVENT(f2fs__inode, f2fs_iget,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+DEFINE_EVENT(f2fs__inode_exit, f2fs_iget_exit,
+
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret)
+);
+
+DEFINE_EVENT(f2fs__inode, f2fs_evict_inode,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+DEFINE_EVENT(f2fs__inode_exit, f2fs_new_inode,
+
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret)
+);
+
+TRACE_EVENT(f2fs_unlink_enter,
+
+ TP_PROTO(struct inode *dir, struct dentry *dentry),
+
+ TP_ARGS(dir, dentry),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, size)
+ __field(blkcnt_t, blocks)
+ __field(const char *, name)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->ino = dir->i_ino;
+ __entry->size = dir->i_size;
+ __entry->blocks = dir->i_blocks;
+ __entry->name = dentry->d_name.name;
+ ),
+
+ TP_printk("dev = (%d,%d), dir ino = %lu, i_size = %lld, "
+ "i_blocks = %llu, name = %s",
+ show_dev_ino(__entry),
+ __entry->size,
+ (unsigned long long)__entry->blocks,
+ __entry->name)
+);
+
+DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit,
+
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret)
+);
+
+DEFINE_EVENT(f2fs__inode, f2fs_truncate,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+TRACE_EVENT(f2fs_truncate_data_blocks_range,
+
+ TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs, int free),
+
+ TP_ARGS(inode, nid, ofs, free),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(nid_t, nid)
+ __field(unsigned int, ofs)
+ __field(int, free)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->nid = nid;
+ __entry->ofs = ofs;
+ __entry->free = free;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, nid = %u, offset = %u, freed = %d",
+ show_dev_ino(__entry),
+ (unsigned int)__entry->nid,
+ __entry->ofs,
+ __entry->free)
+);
+
+DECLARE_EVENT_CLASS(f2fs__truncate_op,
+
+ TP_PROTO(struct inode *inode, u64 from),
+
+ TP_ARGS(inode, from),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, size)
+ __field(blkcnt_t, blocks)
+ __field(u64, from)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->size = inode->i_size;
+ __entry->blocks = inode->i_blocks;
+ __entry->from = from;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, i_size = %lld, i_blocks = %llu, "
+ "start file offset = %llu",
+ show_dev_ino(__entry),
+ __entry->size,
+ (unsigned long long)__entry->blocks,
+ (unsigned long long)__entry->from)
+);
+
+DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_blocks_enter,
+
+ TP_PROTO(struct inode *inode, u64 from),
+
+ TP_ARGS(inode, from)
+);
+
+DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_blocks_exit,
+
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret)
+);
+
+DEFINE_EVENT(f2fs__truncate_op, f2fs_truncate_inode_blocks_enter,
+
+ TP_PROTO(struct inode *inode, u64 from),
+
+ TP_ARGS(inode, from)
+);
+
+DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_inode_blocks_exit,
+
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret)
+);
+
+DECLARE_EVENT_CLASS(f2fs__truncate_node,
+
+ TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr),
+
+ TP_ARGS(inode, nid, blk_addr),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(nid_t, nid)
+ __field(block_t, blk_addr)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->nid = nid;
+ __entry->blk_addr = blk_addr;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, nid = %u, block_address = 0x%llx",
+ show_dev_ino(__entry),
+ (unsigned int)__entry->nid,
+ (unsigned long long)__entry->blk_addr)
+);
+
+DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_nodes_enter,
+
+ TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr),
+
+ TP_ARGS(inode, nid, blk_addr)
+);
+
+DEFINE_EVENT(f2fs__inode_exit, f2fs_truncate_nodes_exit,
+
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret)
+);
+
+DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node,
+
+ TP_PROTO(struct inode *inode, nid_t nid, block_t blk_addr),
+
+ TP_ARGS(inode, nid, blk_addr)
+);
+
+TRACE_EVENT(f2fs_truncate_partial_nodes,
+
+ TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err),
+
+ TP_ARGS(inode, nid, depth, err),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(nid_t, nid[3])
+ __field(int, depth)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->nid[0] = nid[0];
+ __entry->nid[1] = nid[1];
+ __entry->nid[2] = nid[2];
+ __entry->depth = depth;
+ __entry->err = err;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, "
+ "nid[0] = %u, nid[1] = %u, nid[2] = %u, depth = %d, err = %d",
+ show_dev_ino(__entry),
+ (unsigned int)__entry->nid[0],
+ (unsigned int)__entry->nid[1],
+ (unsigned int)__entry->nid[2],
+ __entry->depth,
+ __entry->err)
+);
+
+TRACE_EVENT(f2fs_get_data_block,
+ TP_PROTO(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int ret),
+
+ TP_ARGS(inode, iblock, bh, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(sector_t, iblock)
+ __field(sector_t, bh_start)
+ __field(size_t, bh_size)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->iblock = iblock;
+ __entry->bh_start = bh->b_blocknr;
+ __entry->bh_size = bh->b_size;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
+ "start blkaddr = 0x%llx, len = 0x%llx bytes, err = %d",
+ show_dev_ino(__entry),
+ (unsigned long long)__entry->iblock,
+ (unsigned long long)__entry->bh_start,
+ (unsigned long long)__entry->bh_size,
+ __entry->ret)
+);
+
+TRACE_EVENT(f2fs_get_victim,
+
+ TP_PROTO(struct super_block *sb, int type, int gc_type,
+ struct victim_sel_policy *p, unsigned int pre_victim,
+ unsigned int prefree, unsigned int free),
+
+ TP_ARGS(sb, type, gc_type, p, pre_victim, prefree, free),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, type)
+ __field(int, gc_type)
+ __field(int, alloc_mode)
+ __field(int, gc_mode)
+ __field(unsigned int, victim)
+ __field(unsigned int, ofs_unit)
+ __field(unsigned int, pre_victim)
+ __field(unsigned int, prefree)
+ __field(unsigned int, free)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->type = type;
+ __entry->gc_type = gc_type;
+ __entry->alloc_mode = p->alloc_mode;
+ __entry->gc_mode = p->gc_mode;
+ __entry->victim = p->min_segno;
+ __entry->ofs_unit = p->ofs_unit;
+ __entry->pre_victim = pre_victim;
+ __entry->prefree = prefree;
+ __entry->free = free;
+ ),
+
+ TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u "
+ "ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u",
+ show_dev(__entry),
+ show_data_type(__entry->type),
+ show_gc_type(__entry->gc_type),
+ show_alloc_mode(__entry->alloc_mode),
+ show_victim_policy(__entry->gc_mode),
+ __entry->victim,
+ __entry->ofs_unit,
+ (int)__entry->pre_victim,
+ __entry->prefree,
+ __entry->free)
+);
+
+TRACE_EVENT(f2fs_fallocate,
+
+ TP_PROTO(struct inode *inode, int mode,
+ loff_t offset, loff_t len, int ret),
+
+ TP_ARGS(inode, mode, offset, len, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(int, mode)
+ __field(loff_t, offset)
+ __field(loff_t, len)
+ __field(loff_t, size)
+ __field(blkcnt_t, blocks)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->mode = mode;
+ __entry->offset = offset;
+ __entry->len = len;
+ __entry->size = inode->i_size;
+ __entry->blocks = inode->i_blocks;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, mode = %x, offset = %lld, "
+ "len = %lld, i_size = %lld, i_blocks = %llu, ret = %d",
+ show_dev_ino(__entry),
+ __entry->mode,
+ (unsigned long long)__entry->offset,
+ (unsigned long long)__entry->len,
+ (unsigned long long)__entry->size,
+ (unsigned long long)__entry->blocks,
+ __entry->ret)
+);
+
+TRACE_EVENT(f2fs_direct_IO_enter,
+
+ TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+
+ TP_ARGS(inode, offset, len, rw),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, pos)
+ __field(unsigned long, len)
+ __field(int, rw)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = offset;
+ __entry->len = len;
+ __entry->rw = rw;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d",
+ show_dev_ino(__entry),
+ __entry->pos,
+ __entry->len,
+ __entry->rw)
+);
+
+TRACE_EVENT(f2fs_direct_IO_exit,
+
+ TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
+ int rw, int ret),
+
+ TP_ARGS(inode, offset, len, rw, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, pos)
+ __field(unsigned long, len)
+ __field(int, rw)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = offset;
+ __entry->len = len;
+ __entry->rw = rw;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu "
+ "rw = %d ret = %d",
+ show_dev_ino(__entry),
+ __entry->pos,
+ __entry->len,
+ __entry->rw,
+ __entry->ret)
+);
+
+TRACE_EVENT(f2fs_reserve_new_block,
+
+ TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node),
+
+ TP_ARGS(inode, nid, ofs_in_node),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(nid_t, nid)
+ __field(unsigned int, ofs_in_node)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->nid = nid;
+ __entry->ofs_in_node = ofs_in_node;
+ ),
+
+ TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u",
+ show_dev(__entry),
+ (unsigned int)__entry->nid,
+ __entry->ofs_in_node)
+);
+
+DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
+
+ TP_PROTO(struct page *page, struct f2fs_io_info *fio),
+
+ TP_ARGS(page, fio),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(pgoff_t, index)
+ __field(block_t, blkaddr)
+ __field(int, rw)
+ __field(int, type)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = page->mapping->host->i_sb->s_dev;
+ __entry->ino = page->mapping->host->i_ino;
+ __entry->index = page->index;
+ __entry->blkaddr = fio->blk_addr;
+ __entry->rw = fio->rw;
+ __entry->type = fio->type;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
+ "blkaddr = 0x%llx, rw = %s%s, type = %s",
+ show_dev_ino(__entry),
+ (unsigned long)__entry->index,
+ (unsigned long long)__entry->blkaddr,
+ show_bio_type(__entry->rw),
+ show_block_type(__entry->type))
+);
+
+DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio,
+
+ TP_PROTO(struct page *page, struct f2fs_io_info *fio),
+
+ TP_ARGS(page, fio),
+
+ TP_CONDITION(page->mapping)
+);
+
+DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio,
+
+ TP_PROTO(struct page *page, struct f2fs_io_info *fio),
+
+ TP_ARGS(page, fio),
+
+ TP_CONDITION(page->mapping)
+);
+
+DECLARE_EVENT_CLASS(f2fs__submit_bio,
+
+ TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
+ struct bio *bio),
+
+ TP_ARGS(sb, fio, bio),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, rw)
+ __field(int, type)
+ __field(sector_t, sector)
+ __field(unsigned int, size)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->rw = fio->rw;
+ __entry->type = fio->type;
+ __entry->sector = bio->bi_sector;
+ __entry->size = bio->bi_size;
+ ),
+
+ TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u",
+ show_dev(__entry),
+ show_bio_type(__entry->rw),
+ show_block_type(__entry->type),
+ (unsigned long long)__entry->sector,
+ __entry->size)
+);
+
+DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio,
+
+ TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
+ struct bio *bio),
+
+ TP_ARGS(sb, fio, bio),
+
+ TP_CONDITION(bio)
+);
+
+DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
+
+ TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
+ struct bio *bio),
+
+ TP_ARGS(sb, fio, bio),
+
+ TP_CONDITION(bio)
+);
+
+TRACE_EVENT(f2fs_write_begin,
+
+ TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+ unsigned int flags),
+
+ TP_ARGS(inode, pos, len, flags),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, pos)
+ __field(unsigned int, len)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = pos;
+ __entry->len = len;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, flags = %u",
+ show_dev_ino(__entry),
+ (unsigned long long)__entry->pos,
+ __entry->len,
+ __entry->flags)
+);
+
+TRACE_EVENT(f2fs_write_end,
+
+ TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+ unsigned int copied),
+
+ TP_ARGS(inode, pos, len, copied),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, pos)
+ __field(unsigned int, len)
+ __field(unsigned int, copied)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = pos;
+ __entry->len = len;
+ __entry->copied = copied;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, copied = %u",
+ show_dev_ino(__entry),
+ (unsigned long long)__entry->pos,
+ __entry->len,
+ __entry->copied)
+);
+
+DECLARE_EVENT_CLASS(f2fs__page,
+
+ TP_PROTO(struct page *page, int type),
+
+ TP_ARGS(page, type),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(int, type)
+ __field(int, dir)
+ __field(pgoff_t, index)
+ __field(int, dirty)
+ __field(int, uptodate)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = page->mapping->host->i_sb->s_dev;
+ __entry->ino = page->mapping->host->i_ino;
+ __entry->type = type;
+ __entry->dir = S_ISDIR(page->mapping->host->i_mode);
+ __entry->index = page->index;
+ __entry->dirty = PageDirty(page);
+ __entry->uptodate = PageUptodate(page);
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, "
+ "dirty = %d, uptodate = %d",
+ show_dev_ino(__entry),
+ show_block_type(__entry->type),
+ show_file_type(__entry->dir),
+ (unsigned long)__entry->index,
+ __entry->dirty,
+ __entry->uptodate)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_writepage,
+
+ TP_PROTO(struct page *page, int type),
+
+ TP_ARGS(page, type)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_readpage,
+
+ TP_PROTO(struct page *page, int type),
+
+ TP_ARGS(page, type)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty,
+
+ TP_PROTO(struct page *page, int type),
+
+ TP_ARGS(page, type)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite,
+
+ TP_PROTO(struct page *page, int type),
+
+ TP_ARGS(page, type)
+);
+
+TRACE_EVENT(f2fs_writepages,
+
+ TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type),
+
+ TP_ARGS(inode, wbc, type),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(int, type)
+ __field(int, dir)
+ __field(long, nr_to_write)
+ __field(long, pages_skipped)
+ __field(loff_t, range_start)
+ __field(loff_t, range_end)
+ __field(pgoff_t, writeback_index)
+ __field(int, sync_mode)
+ __field(char, for_kupdate)
+ __field(char, for_background)
+ __field(char, tagged_writepages)
+ __field(char, for_reclaim)
+ __field(char, range_cyclic)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->type = type;
+ __entry->dir = S_ISDIR(inode->i_mode);
+ __entry->nr_to_write = wbc->nr_to_write;
+ __entry->pages_skipped = wbc->pages_skipped;
+ __entry->range_start = wbc->range_start;
+ __entry->range_end = wbc->range_end;
+ __entry->writeback_index = inode->i_mapping->writeback_index;
+ __entry->sync_mode = wbc->sync_mode;
+ __entry->for_kupdate = wbc->for_kupdate;
+ __entry->for_background = wbc->for_background;
+ __entry->tagged_writepages = wbc->tagged_writepages;
+ __entry->for_reclaim = wbc->for_reclaim;
+ __entry->range_cyclic = wbc->range_cyclic;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, "
+ "skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, "
+ "kupdate %u background %u tagged %u reclaim %u cyclic %u",
+ show_dev_ino(__entry),
+ show_block_type(__entry->type),
+ show_file_type(__entry->dir),
+ __entry->nr_to_write,
+ __entry->pages_skipped,
+ __entry->range_start,
+ __entry->range_end,
+ (unsigned long)__entry->writeback_index,
+ __entry->sync_mode,
+ __entry->for_kupdate,
+ __entry->for_background,
+ __entry->tagged_writepages,
+ __entry->for_reclaim,
+ __entry->range_cyclic)
+);
+
+TRACE_EVENT(f2fs_write_checkpoint,
+
+ TP_PROTO(struct super_block *sb, int reason, char *msg),
+
+ TP_ARGS(sb, reason, msg),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, reason)
+ __field(char *, msg)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->reason = reason;
+ __entry->msg = msg;
+ ),
+
+ TP_printk("dev = (%d,%d), checkpoint for %s, state = %s",
+ show_dev(__entry),
+ show_cpreason(__entry->reason),
+ __entry->msg)
+);
+
+TRACE_EVENT(f2fs_issue_discard,
+
+ TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen),
+
+ TP_ARGS(sb, blkstart, blklen),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(block_t, blkstart)
+ __field(block_t, blklen)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->blkstart = blkstart;
+ __entry->blklen = blklen;
+ ),
+
+ TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx",
+ show_dev(__entry),
+ (unsigned long long)__entry->blkstart,
+ (unsigned long long)__entry->blklen)
+);
+
+TRACE_EVENT(f2fs_issue_flush,
+
+ TP_PROTO(struct super_block *sb, unsigned int nobarrier,
+ unsigned int flush_merge),
+
+ TP_ARGS(sb, nobarrier, flush_merge),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, nobarrier)
+ __field(unsigned int, flush_merge)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->nobarrier = nobarrier;
+ __entry->flush_merge = flush_merge;
+ ),
+
+ TP_printk("dev = (%d,%d), %s %s",
+ show_dev(__entry),
+ __entry->nobarrier ? "skip (nobarrier)" : "issue",
+ __entry->flush_merge ? " with flush_merge" : "")
+);
+#endif /* _TRACE_F2FS_H */
+
+ /* This part must be outside protection */
+#include <trace/define_trace.h>