summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src/call-stub.c
blob: 80b7f12ac9b336e719bead58959e5093edfc8ab1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
/*
  Copyright (c) 2007-2009 Gluster, Inc. <http://www.gluster.com>
  This file is part of GlusterFS.

  GlusterFS is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published
  by the Free Software Foundation; either version 3 of the License,
  or (at your option) any later version.

  GlusterFS is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program.  If not, see
  <http://www.gnu.org/licenses/>.
*/

#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif

#include <inttypes.h>

#include "md5.h"
#include "call-stub.h"


static call_stub_t *
stub_new (call_frame_t *frame,
	  char wind,
	  glusterfs_fop_t fop)
{
	call_stub_t *new = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	new = CALLOC (1, sizeof (*new));
	GF_VALIDATE_OR_GOTO ("call-stub", new, out);

	new->frame = frame;
	new->wind = wind;
	new->fop = fop;

	INIT_LIST_HEAD (&new->list);
out:
	return new;
}


call_stub_t *
fop_lookup_stub (call_frame_t *frame,
		 fop_lookup_t fn,
		 loc_t *loc,
		 dict_t *xattr_req)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_LOOKUP);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.lookup.fn = fn;

	if (xattr_req)
		stub->args.lookup.xattr_req = dict_ref (xattr_req);

	loc_copy (&stub->args.lookup.loc, loc);
out:
	return stub;
}


call_stub_t *
fop_lookup_cbk_stub (call_frame_t *frame,
		     fop_lookup_cbk_t fn,
		     int32_t op_ret,
		     int32_t op_errno,
		     inode_t *inode,
		     struct stat *buf,
                     dict_t *dict,
                     struct stat *postparent)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_LOOKUP);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.lookup_cbk.fn = fn;
	stub->args.lookup_cbk.op_ret = op_ret;
	stub->args.lookup_cbk.op_errno = op_errno;
	if (inode)
		stub->args.lookup_cbk.inode = inode_ref (inode);
	if (buf)
		stub->args.lookup_cbk.buf = *buf;
	if (dict)
		stub->args.lookup_cbk.dict = dict_ref (dict);
        if (postparent)
                stub->args.lookup_cbk.postparent = *postparent;
out:
	return stub;
}



call_stub_t *
fop_stat_stub (call_frame_t *frame,
	       fop_stat_t fn,
	       loc_t *loc)
{
	call_stub_t *stub = NULL;
  
	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_STAT);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.stat.fn = fn;
	loc_copy (&stub->args.stat.loc, loc);
out:
	return stub;
}


call_stub_t *
fop_stat_cbk_stub (call_frame_t *frame,
		   fop_stat_cbk_t fn,
		   int32_t op_ret,
		   int32_t op_errno,
		   struct stat *buf)
{
	call_stub_t *stub = NULL;
	
	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_STAT);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.stat_cbk.fn = fn;
	stub->args.stat_cbk.op_ret = op_ret;
	stub->args.stat_cbk.op_errno = op_errno;
	if (op_ret == 0)
		stub->args.stat_cbk.buf = *buf;
out:
	return stub;
}


call_stub_t *
fop_fstat_stub (call_frame_t *frame,
		fop_fstat_t fn,
		fd_t *fd)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 1, GF_FOP_FSTAT);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fstat.fn = fn;

	if (fd)
		stub->args.fstat.fd = fd_ref (fd);
out:
	return stub;
}


call_stub_t *
fop_fstat_cbk_stub (call_frame_t *frame,
		    fop_fstat_cbk_t fn,
		    int32_t op_ret,
		    int32_t op_errno,
		    struct stat *buf)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_FSTAT);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fstat_cbk.fn = fn;
	stub->args.fstat_cbk.op_ret = op_ret;
	stub->args.fstat_cbk.op_errno = op_errno;
	if (buf)
		stub->args.fstat_cbk.buf = *buf;
out:
	return stub;
}


/* truncate */

call_stub_t *
fop_truncate_stub (call_frame_t *frame,
		   fop_truncate_t fn,
		   loc_t *loc,
		   off_t off)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);	
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_TRUNCATE);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.truncate.fn = fn;
	loc_copy (&stub->args.truncate.loc, loc);
	stub->args.truncate.off = off;
out:
	return stub;
}


call_stub_t *
fop_truncate_cbk_stub (call_frame_t *frame,
		       fop_truncate_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno,
		       struct stat *prebuf,
                       struct stat *postbuf)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_TRUNCATE);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.truncate_cbk.fn = fn;
	stub->args.truncate_cbk.op_ret = op_ret;
	stub->args.truncate_cbk.op_errno = op_errno;
	if (prebuf)
		stub->args.truncate_cbk.prebuf = *prebuf;
        if (postbuf)
                stub->args.truncate_cbk.postbuf = *postbuf;
out:
	return stub;
}


call_stub_t *
fop_ftruncate_stub (call_frame_t *frame,
		    fop_ftruncate_t fn,
		    fd_t *fd,
		    off_t off)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 1, GF_FOP_FTRUNCATE);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.ftruncate.fn = fn;
	if (fd)
		stub->args.ftruncate.fd = fd_ref (fd);

	stub->args.ftruncate.off = off;
out:
	return stub;
}


call_stub_t *
fop_ftruncate_cbk_stub (call_frame_t *frame,
			fop_ftruncate_cbk_t fn,
			int32_t op_ret,
			int32_t op_errno,
			struct stat *prebuf,
                        struct stat *postbuf)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_FTRUNCATE);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.ftruncate_cbk.fn = fn;
	stub->args.ftruncate_cbk.op_ret = op_ret;
	stub->args.ftruncate_cbk.op_errno = op_errno;
	if (prebuf)
		stub->args.ftruncate_cbk.prebuf = *prebuf;
	if (postbuf)
		stub->args.ftruncate_cbk.postbuf = *postbuf;
out:
	return stub;
}


call_stub_t *
fop_access_stub (call_frame_t *frame,
		 fop_access_t fn,
		 loc_t *loc,
		 int32_t mask)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_ACCESS);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.access.fn = fn;
	loc_copy (&stub->args.access.loc, loc);
	stub->args.access.mask = mask;
out:
	return stub;
}


call_stub_t *
fop_access_cbk_stub (call_frame_t *frame,
		     fop_access_cbk_t fn,
		     int32_t op_ret,
		     int32_t op_errno)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_ACCESS);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.access_cbk.fn = fn;
	stub->args.access_cbk.op_ret = op_ret;
	stub->args.access_cbk.op_errno = op_errno;
out:
	return stub;
}


call_stub_t *
fop_readlink_stub (call_frame_t *frame,
		   fop_readlink_t fn,
		   loc_t *loc,
		   size_t size)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
	
	stub = stub_new (frame, 1, GF_FOP_READLINK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.readlink.fn = fn;
	loc_copy (&stub->args.readlink.loc, loc);
	stub->args.readlink.size = size;
out:
	return stub;
}


call_stub_t *
fop_readlink_cbk_stub (call_frame_t *frame,
		       fop_readlink_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno,
		       const char *path,
                       struct stat *sbuf)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_READLINK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.readlink_cbk.fn = fn;
	stub->args.readlink_cbk.op_ret = op_ret;
	stub->args.readlink_cbk.op_errno = op_errno;
	if (path)
		stub->args.readlink_cbk.buf = strdup (path);
        if (sbuf)
                stub->args.readlink_cbk.sbuf = *sbuf;
out:
	return stub;
}


call_stub_t *
fop_mknod_stub (call_frame_t *frame,
		fop_mknod_t fn,
		loc_t *loc,
		mode_t mode,
		dev_t rdev)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_MKNOD);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.mknod.fn = fn;
	loc_copy (&stub->args.mknod.loc, loc);
	stub->args.mknod.mode = mode;
	stub->args.mknod.rdev = rdev;
out:
	return stub;
}


call_stub_t *
fop_mknod_cbk_stub (call_frame_t *frame,
		    fop_mknod_cbk_t fn,
		    int32_t op_ret,
		    int32_t op_errno,
		    inode_t *inode,
                    struct stat *buf,
                    struct stat *preparent,
                    struct stat *postparent)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_MKNOD);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.mknod_cbk.fn = fn;
	stub->args.mknod_cbk.op_ret = op_ret;
	stub->args.mknod_cbk.op_errno = op_errno;
	if (inode)
		stub->args.mknod_cbk.inode = inode_ref (inode);
	if (buf)
		stub->args.mknod_cbk.buf = *buf;
        if (preparent)
                stub->args.mknod_cbk.preparent = *preparent;
        if (postparent)
                stub->args.mknod_cbk.postparent = *postparent;
out:
	return stub;
}


call_stub_t *
fop_mkdir_stub (call_frame_t *frame,
		fop_mkdir_t fn,
		loc_t *loc,
		mode_t mode)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_MKDIR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.mkdir.fn = fn;
	loc_copy (&stub->args.mkdir.loc, loc);
	stub->args.mkdir.mode = mode;
out:
	return stub;
}


call_stub_t *
fop_mkdir_cbk_stub (call_frame_t *frame,
		    fop_mkdir_cbk_t fn,
		    int32_t op_ret,
		    int32_t op_errno,
		    inode_t *inode,
                    struct stat *buf,
                    struct stat *preparent,
                    struct stat *postparent)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_MKDIR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.mkdir_cbk.fn = fn;
	stub->args.mkdir_cbk.op_ret = op_ret;
	stub->args.mkdir_cbk.op_errno = op_errno;
	if (inode)
		stub->args.mkdir_cbk.inode = inode_ref (inode);
	if (buf)
		stub->args.mkdir_cbk.buf = *buf;
        if (preparent)
                stub->args.mkdir_cbk.preparent = *preparent;
        if (postparent)
                stub->args.mkdir_cbk.postparent = *postparent;
out:
	return stub;
}


call_stub_t *
fop_unlink_stub (call_frame_t *frame,
		 fop_unlink_t fn,
		 loc_t *loc)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_UNLINK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.unlink.fn = fn;
	loc_copy (&stub->args.unlink.loc, loc);
out:
	return stub;
}


call_stub_t *
fop_unlink_cbk_stub (call_frame_t *frame,
		     fop_unlink_cbk_t fn,
		     int32_t op_ret,
		     int32_t op_errno,
                     struct stat *preparent,
                     struct stat *postparent)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_UNLINK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.unlink_cbk.fn = fn;
	stub->args.unlink_cbk.op_ret = op_ret;
	stub->args.unlink_cbk.op_errno = op_errno;
        if (preparent)
                stub->args.unlink_cbk.preparent = *preparent;
        if (postparent)
                stub->args.unlink_cbk.postparent = *postparent;
out:
	return stub;
}



call_stub_t *
fop_rmdir_stub (call_frame_t *frame,
		fop_rmdir_t fn,
		loc_t *loc)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_RMDIR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.rmdir.fn = fn;
	loc_copy (&stub->args.rmdir.loc, loc);
out:
	return stub;
}


call_stub_t *
fop_rmdir_cbk_stub (call_frame_t *frame,
		    fop_rmdir_cbk_t fn,
		    int32_t op_ret,
		    int32_t op_errno,
                    struct stat *preparent,
                    struct stat *postparent)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_RMDIR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.rmdir_cbk.fn = fn;
	stub->args.rmdir_cbk.op_ret = op_ret;
	stub->args.rmdir_cbk.op_errno = op_errno;
        if (preparent)
                stub->args.rmdir_cbk.preparent = *preparent;
        if (postparent)
                stub->args.rmdir_cbk.postparent = *postparent;
out:
	return stub;
}


call_stub_t *
fop_symlink_stub (call_frame_t *frame,
		  fop_symlink_t fn,
		  const char *linkname,
		  loc_t *loc)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
	GF_VALIDATE_OR_GOTO ("call-stub", linkname, out);

	stub = stub_new (frame, 1, GF_FOP_SYMLINK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.symlink.fn = fn;
	stub->args.symlink.linkname = strdup (linkname);
	loc_copy (&stub->args.symlink.loc, loc);
out:
	return stub;
}


call_stub_t *
fop_symlink_cbk_stub (call_frame_t *frame,
		      fop_symlink_cbk_t fn,
		      int32_t op_ret,
		      int32_t op_errno,
		      inode_t *inode,
                      struct stat *buf,
                      struct stat *preparent,
                      struct stat *postparent)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_SYMLINK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.symlink_cbk.fn = fn;
	stub->args.symlink_cbk.op_ret = op_ret;
	stub->args.symlink_cbk.op_errno = op_errno;
	if (inode)
		stub->args.symlink_cbk.inode = inode_ref (inode);
	if (buf)
		stub->args.symlink_cbk.buf = *buf;
        if (preparent)
                stub->args.symlink_cbk.preparent = *preparent;
        if (postparent)
                stub->args.symlink_cbk.postparent = *postparent;
out:
	return stub;
}


call_stub_t *
fop_rename_stub (call_frame_t *frame,
		 fop_rename_t fn,
		 loc_t *oldloc,
		 loc_t *newloc)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
	GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);

	stub = stub_new (frame, 1, GF_FOP_RENAME);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.rename.fn = fn;
	loc_copy (&stub->args.rename.old, oldloc);
	loc_copy (&stub->args.rename.new, newloc);
out:
	return stub;
}


call_stub_t *
fop_rename_cbk_stub (call_frame_t *frame,
		     fop_rename_cbk_t fn,
		     int32_t op_ret,
		     int32_t op_errno,
		     struct stat *buf,
                     struct stat *preoldparent,
                     struct stat *postoldparent,
                     struct stat *prenewparent,
                     struct stat *postnewparent)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_RENAME);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.rename_cbk.fn = fn;
	stub->args.rename_cbk.op_ret = op_ret;
	stub->args.rename_cbk.op_errno = op_errno;
	if (buf)
		stub->args.rename_cbk.buf = *buf;
        if (preoldparent)
                stub->args.rename_cbk.preoldparent = *preoldparent;
        if (postoldparent)
                stub->args.rename_cbk.postoldparent = *postoldparent;
        if (prenewparent)
                stub->args.rename_cbk.prenewparent = *prenewparent;
        if (postnewparent)
                stub->args.rename_cbk.postnewparent = *postnewparent;
out:
	return stub;
}


call_stub_t *
fop_link_stub (call_frame_t *frame,
	       fop_link_t fn,
	       loc_t *oldloc,
	       loc_t *newloc)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
	GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);

	stub = stub_new (frame, 1, GF_FOP_LINK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.link.fn = fn;
	loc_copy (&stub->args.link.oldloc, oldloc);
	loc_copy (&stub->args.link.newloc, newloc);

out:
	return stub;
}


call_stub_t *
fop_link_cbk_stub (call_frame_t *frame,
		   fop_link_cbk_t fn,
		   int32_t op_ret,
		   int32_t op_errno,
		   inode_t *inode,
                   struct stat *buf,
                   struct stat *preparent,
                   struct stat *postparent)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_LINK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.link_cbk.fn = fn;
	stub->args.link_cbk.op_ret = op_ret;
	stub->args.link_cbk.op_errno = op_errno;
	if (inode)
		stub->args.link_cbk.inode = inode_ref (inode);
	if (buf)
		stub->args.link_cbk.buf = *buf;
        if (preparent)
                stub->args.link_cbk.preparent = *preparent;
        if (postparent)
                stub->args.link_cbk.postparent = *postparent;
out:
	return stub;
}


call_stub_t *
fop_create_stub (call_frame_t *frame,
		 fop_create_t fn,
		 loc_t *loc,
		 int32_t flags,
		 mode_t mode, fd_t *fd)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_CREATE);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.create.fn = fn;
	loc_copy (&stub->args.create.loc, loc);
	stub->args.create.flags = flags;
	stub->args.create.mode = mode;
	if (fd)
		stub->args.create.fd = fd_ref (fd);
out:
	return stub;
}


call_stub_t *
fop_create_cbk_stub (call_frame_t *frame,
		     fop_create_cbk_t fn,
		     int32_t op_ret,
		     int32_t op_errno,
		     fd_t *fd,
		     inode_t *inode,
		     struct stat *buf,
                     struct stat *preparent,
                     struct stat *postparent)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_CREATE);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.create_cbk.fn = fn;
	stub->args.create_cbk.op_ret = op_ret;
	stub->args.create_cbk.op_errno = op_errno;
	if (fd)
		stub->args.create_cbk.fd = fd_ref (fd);
	if (inode)
		stub->args.create_cbk.inode = inode_ref (inode);
	if (buf)
		stub->args.create_cbk.buf = *buf;
        if (preparent)
                stub->args.create_cbk.preparent = *preparent;
        if (postparent)
                stub->args.create_cbk.postparent = *postparent;
out:
	return stub;
}


call_stub_t *
fop_open_stub (call_frame_t *frame,
	       fop_open_t fn,
	       loc_t *loc,
	       int32_t flags, fd_t *fd,
               int32_t wbflags)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_OPEN);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.open.fn = fn;
	loc_copy (&stub->args.open.loc, loc);
	stub->args.open.flags = flags;
        stub->args.open.wbflags = wbflags;
	if (fd)
		stub->args.open.fd = fd_ref (fd);
out:
	return stub;
}


call_stub_t *
fop_open_cbk_stub (call_frame_t *frame,
		   fop_open_cbk_t fn,
		   int32_t op_ret,
		   int32_t op_errno,
		   fd_t *fd)

{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_OPEN);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.open_cbk.fn = fn;
	stub->args.open_cbk.op_ret = op_ret;
	stub->args.open_cbk.op_errno = op_errno;
	if (fd)
		stub->args.open_cbk.fd = fd_ref (fd);
out:
	return stub;
}


call_stub_t *
fop_readv_stub (call_frame_t *frame,
		fop_readv_t fn,
		fd_t *fd,
		size_t size,
		off_t off)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 1, GF_FOP_READ);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.readv.fn = fn;
	if (fd)
		stub->args.readv.fd = fd_ref (fd);
	stub->args.readv.size = size;
	stub->args.readv.off = off;
out:
	return stub;
}


call_stub_t *
fop_readv_cbk_stub (call_frame_t *frame,
		    fop_readv_cbk_t fn,
		    int32_t op_ret,
		    int32_t op_errno,
		    struct iovec *vector,
		    int32_t count,
		    struct stat *stbuf,
                    struct iobref *iobref)

{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_READ);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.readv_cbk.fn = fn;
	stub->args.readv_cbk.op_ret = op_ret;
	stub->args.readv_cbk.op_errno = op_errno;
	if (op_ret >= 0) {
		stub->args.readv_cbk.vector = iov_dup (vector, count);
		stub->args.readv_cbk.count = count;
		stub->args.readv_cbk.stbuf = *stbuf;
		stub->args.readv_cbk.iobref = iobref_ref (iobref);
	}
out:
	return stub;
}


call_stub_t *
fop_writev_stub (call_frame_t *frame,
		 fop_writev_t fn,
		 fd_t *fd,
		 struct iovec *vector,
		 int32_t count,
		 off_t off,
                 struct iobref *iobref)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", vector, out);

	stub = stub_new (frame, 1, GF_FOP_WRITE);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.writev.fn = fn;
	if (fd)
		stub->args.writev.fd = fd_ref (fd);
	stub->args.writev.vector = iov_dup (vector, count);
	stub->args.writev.count = count;
	stub->args.writev.off = off;
        stub->args.writev.iobref = iobref_ref (iobref);
out:
	return stub;
}


call_stub_t *
fop_writev_cbk_stub (call_frame_t *frame,
		     fop_writev_cbk_t fn,
		     int32_t op_ret,
		     int32_t op_errno,
                     struct stat *prebuf,
		     struct stat *postbuf)

{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_WRITE);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.writev_cbk.fn = fn;
	stub->args.writev_cbk.op_ret = op_ret;
	stub->args.writev_cbk.op_errno = op_errno;
	if (op_ret >= 0)
		stub->args.writev_cbk.postbuf = *postbuf;
        if (prebuf)
                stub->args.writev_cbk.prebuf = *prebuf;
out:
	return stub;
}



call_stub_t *
fop_flush_stub (call_frame_t *frame,
		fop_flush_t fn,
		fd_t *fd)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 1, GF_FOP_FLUSH);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.flush.fn = fn;
	if (fd)
		stub->args.flush.fd = fd_ref (fd);
out:
	return stub;
}


call_stub_t *
fop_flush_cbk_stub (call_frame_t *frame,
		    fop_flush_cbk_t fn,
		    int32_t op_ret,
		    int32_t op_errno)

{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_FLUSH);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.flush_cbk.fn = fn;
	stub->args.flush_cbk.op_ret = op_ret;
	stub->args.flush_cbk.op_errno = op_errno;
out:
	return stub;
}




call_stub_t *
fop_fsync_stub (call_frame_t *frame,
		fop_fsync_t fn,
		fd_t *fd,
		int32_t datasync)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 1, GF_FOP_FSYNC);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fsync.fn = fn;
	if (fd)
		stub->args.fsync.fd = fd_ref (fd);
	stub->args.fsync.datasync = datasync;
out:
	return stub;
}


call_stub_t *
fop_fsync_cbk_stub (call_frame_t *frame,
		    fop_fsync_cbk_t fn,
		    int32_t op_ret,
		    int32_t op_errno,
                    struct stat *prebuf,
                    struct stat *postbuf)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_FSYNC);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fsync_cbk.fn = fn;
	stub->args.fsync_cbk.op_ret = op_ret;
	stub->args.fsync_cbk.op_errno = op_errno;
        if (prebuf)
                stub->args.fsync_cbk.prebuf = *prebuf;
        if (postbuf)
                stub->args.fsync_cbk.postbuf = *postbuf;
out:
	return stub;
}


call_stub_t *
fop_opendir_stub (call_frame_t *frame,
		  fop_opendir_t fn,
		  loc_t *loc, fd_t *fd)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_OPENDIR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.opendir.fn = fn;
	loc_copy (&stub->args.opendir.loc, loc);
	if (fd)
		stub->args.opendir.fd = fd_ref (fd);
out:
	return stub;
}


call_stub_t *
fop_opendir_cbk_stub (call_frame_t *frame,
		      fop_opendir_cbk_t fn,
		      int32_t op_ret,
		      int32_t op_errno,
		      fd_t *fd)

{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_OPENDIR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.opendir_cbk.fn = fn;
	stub->args.opendir_cbk.op_ret = op_ret;
	stub->args.opendir_cbk.op_errno = op_errno;

	if (fd)
		stub->args.opendir_cbk.fd = fd_ref (fd);
out:
	return stub;
}


call_stub_t *
fop_getdents_stub (call_frame_t *frame,
		   fop_getdents_t fn,
		   fd_t *fd,
		   size_t size,
		   off_t off,
		   int32_t flag)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 1, GF_FOP_GETDENTS);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.getdents.fn = fn;
	stub->args.getdents.size = size;
	stub->args.getdents.off = off;
	if (fd)
		stub->args.getdents.fd = fd_ref (fd);
	stub->args.getdents.flag = flag;
out:
	return stub;
}


call_stub_t *
fop_getdents_cbk_stub (call_frame_t *frame,
		       fop_getdents_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno,
		       dir_entry_t *entries,
		       int32_t count)

{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_GETDENTS);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.getdents_cbk.fn = fn;
	stub->args.getdents_cbk.op_ret = op_ret;
	stub->args.getdents_cbk.op_errno = op_errno;
	if (op_ret >= 0) {
		stub->args.getdents_cbk.entries.next = entries->next;
		/* FIXME: are entries not needed in the caller after
		 * creating stub? */
		entries->next = NULL;
	}

	stub->args.getdents_cbk.count = count;
out:
	return stub;
}



call_stub_t *
fop_fsyncdir_stub (call_frame_t *frame,
		   fop_fsyncdir_t fn,
		   fd_t *fd,
		   int32_t datasync)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 1, GF_FOP_FSYNCDIR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fsyncdir.fn = fn;
	if (fd)
		stub->args.fsyncdir.fd = fd_ref (fd);
	stub->args.fsyncdir.datasync = datasync;
out:
	return stub;
}


call_stub_t *
fop_fsyncdir_cbk_stub (call_frame_t *frame,
		       fop_fsyncdir_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno)

{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_FSYNCDIR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fsyncdir_cbk.fn = fn;
	stub->args.fsyncdir_cbk.op_ret = op_ret;
	stub->args.fsyncdir_cbk.op_errno = op_errno;
out:
	return stub;
}


call_stub_t *
fop_statfs_stub (call_frame_t *frame,
		 fop_statfs_t fn,
		 loc_t *loc)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out); 

	stub = stub_new (frame, 1, GF_FOP_STATFS);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.statfs.fn = fn;
	loc_copy (&stub->args.statfs.loc, loc);
out:
	return stub;
}


call_stub_t *
fop_statfs_cbk_stub (call_frame_t *frame,
		     fop_statfs_cbk_t fn,
		     int32_t op_ret,
		     int32_t op_errno,
		     struct statvfs *buf)

{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_STATFS);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.statfs_cbk.fn = fn;
	stub->args.statfs_cbk.op_ret = op_ret;
	stub->args.statfs_cbk.op_errno = op_errno;
	if (op_ret == 0)
		stub->args.statfs_cbk.buf = *buf;
out:
	return stub;
}


call_stub_t *
fop_setxattr_stub (call_frame_t *frame,
		   fop_setxattr_t fn,
		   loc_t *loc,
		   dict_t *dict,
		   int32_t flags)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_SETXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.setxattr.fn = fn;
	loc_copy (&stub->args.setxattr.loc, loc);
	/* TODO */
	if (dict)
		stub->args.setxattr.dict = dict_ref (dict);
	stub->args.setxattr.flags = flags;
out:
	return stub;
}


call_stub_t *
fop_setxattr_cbk_stub (call_frame_t *frame,
		       fop_setxattr_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_SETXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.setxattr_cbk.fn = fn;
	stub->args.setxattr_cbk.op_ret = op_ret;
	stub->args.setxattr_cbk.op_errno = op_errno;
out:
	return stub;
}

call_stub_t *
fop_getxattr_stub (call_frame_t *frame,
		   fop_getxattr_t fn,
		   loc_t *loc,
		   const char *name)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_GETXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.getxattr.fn = fn;
	loc_copy (&stub->args.getxattr.loc, loc);

	if (name)
	        stub->args.getxattr.name = strdup (name);
out:
	return stub;
}


call_stub_t *
fop_getxattr_cbk_stub (call_frame_t *frame,
		       fop_getxattr_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno,
		       dict_t *dict)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_GETXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.getxattr_cbk.fn = fn;
	stub->args.getxattr_cbk.op_ret = op_ret;
	stub->args.getxattr_cbk.op_errno = op_errno;
	/* TODO */
	if (dict)
		stub->args.getxattr_cbk.dict = dict_ref (dict);
out:
	return stub;
}


call_stub_t *
fop_fsetxattr_stub (call_frame_t *frame,
                    fop_fsetxattr_t fn,
                    fd_t *fd,
                    dict_t *dict,
                    int32_t flags)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", fd, out);

	stub = stub_new (frame, 1, GF_FOP_FSETXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fsetxattr.fn = fn;
	stub->args.fsetxattr.fd = fd_ref (fd);

	/* TODO */
	if (dict)
		stub->args.fsetxattr.dict = dict_ref (dict);
	stub->args.fsetxattr.flags = flags;
out:
	return stub;
}


call_stub_t *
fop_fsetxattr_cbk_stub (call_frame_t *frame,
                        fop_fsetxattr_cbk_t fn,
                        int32_t op_ret,
                        int32_t op_errno)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_FSETXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fsetxattr_cbk.fn = fn;
	stub->args.fsetxattr_cbk.op_ret = op_ret;
	stub->args.fsetxattr_cbk.op_errno = op_errno;
out:
	return stub;
}


call_stub_t *
fop_fgetxattr_stub (call_frame_t *frame,
                    fop_fgetxattr_t fn,
                    fd_t *fd,
                    const char *name)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", fd, out);

	stub = stub_new (frame, 1, GF_FOP_FGETXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fgetxattr.fn = fn;
	stub->args.fgetxattr.fd = fd_ref (fd);

	if (name)
	        stub->args.fgetxattr.name = strdup (name);
out:
	return stub;
}


call_stub_t *
fop_fgetxattr_cbk_stub (call_frame_t *frame,
                        fop_fgetxattr_cbk_t fn,
                        int32_t op_ret,
                        int32_t op_errno,
                        dict_t *dict)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_GETXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.fgetxattr_cbk.fn = fn;
	stub->args.fgetxattr_cbk.op_ret = op_ret;
	stub->args.fgetxattr_cbk.op_errno = op_errno;

	/* TODO */
	if (dict)
		stub->args.fgetxattr_cbk.dict = dict_ref (dict);
out:
	return stub;
}


call_stub_t *
fop_removexattr_stub (call_frame_t *frame,
		      fop_removexattr_t fn,
		      loc_t *loc,
		      const char *name)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
	GF_VALIDATE_OR_GOTO ("call-stub", name, out);

	stub = stub_new (frame, 1, GF_FOP_REMOVEXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.removexattr.fn = fn;
	loc_copy (&stub->args.removexattr.loc, loc);
	stub->args.removexattr.name = strdup (name);
out:
	return stub;
}


call_stub_t *
fop_removexattr_cbk_stub (call_frame_t *frame,
			  fop_removexattr_cbk_t fn,
			  int32_t op_ret,
			  int32_t op_errno)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_REMOVEXATTR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.removexattr_cbk.fn = fn;
	stub->args.removexattr_cbk.op_ret = op_ret;
	stub->args.removexattr_cbk.op_errno = op_errno;
out:
	return stub;
}


call_stub_t *
fop_lk_stub (call_frame_t *frame,
	     fop_lk_t fn,
	     fd_t *fd,
	     int32_t cmd,
	     struct flock *lock)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
	
	stub = stub_new (frame, 1, GF_FOP_LK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.lk.fn = fn;
	if (fd)
		stub->args.lk.fd = fd_ref (fd);
	stub->args.lk.cmd = cmd;
	stub->args.lk.lock = *lock;
out:
	return stub;
}


call_stub_t *
fop_lk_cbk_stub (call_frame_t *frame,
		 fop_lk_cbk_t fn,
		 int32_t op_ret,
		 int32_t op_errno,
		 struct flock *lock)

{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_LK);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.lk_cbk.fn = fn;
	stub->args.lk_cbk.op_ret = op_ret;
	stub->args.lk_cbk.op_errno = op_errno;
	if (op_ret == 0)
		stub->args.lk_cbk.lock = *lock;
out:
	return stub;
}

call_stub_t *
fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
		  const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
{
  call_stub_t *stub = NULL;

  if (!frame || !lock)
    return NULL;

  stub = stub_new (frame, 1, GF_FOP_INODELK);
  if (!stub)
    return NULL;

  stub->args.inodelk.fn = fn;

  if (volume)
          stub->args.inodelk.volume = strdup (volume);

  loc_copy (&stub->args.inodelk.loc, loc);
  stub->args.inodelk.cmd  = cmd;
  stub->args.inodelk.lock = *lock;

  return stub;
}

call_stub_t *
fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
		      int32_t op_ret, int32_t op_errno)
{
  call_stub_t *stub = NULL;

  if (!frame)
    return NULL;

  stub = stub_new (frame, 0, GF_FOP_INODELK);
  if (!stub)
    return NULL;

  stub->args.inodelk_cbk.fn       = fn;
  stub->args.inodelk_cbk.op_ret   = op_ret;
  stub->args.inodelk_cbk.op_errno = op_errno;

  return stub;
}


call_stub_t *
fop_finodelk_stub (call_frame_t *frame, fop_finodelk_t fn,
		   const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
{
  call_stub_t *stub = NULL;

  if (!frame || !lock)
    return NULL;

  stub = stub_new (frame, 1, GF_FOP_FINODELK);
  if (!stub)
    return NULL;

  stub->args.finodelk.fn = fn;

  if (fd)
	  stub->args.finodelk.fd   = fd_ref (fd);

  if (volume)
          stub->args.finodelk.volume = strdup (volume);

  stub->args.finodelk.cmd  = cmd;
  stub->args.finodelk.lock = *lock;

  return stub;
}


call_stub_t *
fop_finodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
		       int32_t op_ret, int32_t op_errno)
{
  call_stub_t *stub = NULL;

  if (!frame)
    return NULL;

  stub = stub_new (frame, 0, GF_FOP_FINODELK);
  if (!stub)
    return NULL;

  stub->args.finodelk_cbk.fn       = fn;
  stub->args.finodelk_cbk.op_ret   = op_ret;
  stub->args.finodelk_cbk.op_errno = op_errno;

  return stub;
}


call_stub_t *
fop_entrylk_stub (call_frame_t *frame, fop_entrylk_t fn,
		  const char *volume, loc_t *loc, const char *name,
		  entrylk_cmd cmd, entrylk_type type)
{
  call_stub_t *stub = NULL;

  if (!frame)
    return NULL;

  stub = stub_new (frame, 1, GF_FOP_ENTRYLK);
  if (!stub)
    return NULL;

  stub->args.entrylk.fn = fn;

  if (volume)
          stub->args.entrylk.volume = strdup (volume);

  loc_copy (&stub->args.entrylk.loc, loc);

  stub->args.entrylk.cmd = cmd;
  stub->args.entrylk.type = type;
  if (name)
	  stub->args.entrylk.name = strdup (name);

  return stub;
}

call_stub_t *
fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
		      int32_t op_ret, int32_t op_errno)
{
  call_stub_t *stub = NULL;

  if (!frame)
    return NULL;

  stub = stub_new (frame, 0, GF_FOP_ENTRYLK);
  if (!stub)
    return NULL;

  stub->args.entrylk_cbk.fn       = fn;
  stub->args.entrylk_cbk.op_ret   = op_ret;
  stub->args.entrylk_cbk.op_errno = op_errno;

  return stub;
}


call_stub_t *
fop_fentrylk_stub (call_frame_t *frame, fop_fentrylk_t fn,
		   const char *volume, fd_t *fd, const char *name,
		   entrylk_cmd cmd, entrylk_type type)
{
  call_stub_t *stub = NULL;

  if (!frame)
    return NULL;

  stub = stub_new (frame, 1, GF_FOP_FENTRYLK);
  if (!stub)
    return NULL;

  stub->args.fentrylk.fn = fn;

  if (volume)
          stub->args.fentrylk.volume = strdup (volume);

  if (fd)
	  stub->args.fentrylk.fd = fd_ref (fd);
  stub->args.fentrylk.cmd = cmd;
  stub->args.fentrylk.type = type;
  if (name)
	  stub->args.fentrylk.name = strdup (name);

  return stub;
}

call_stub_t *
fop_fentrylk_cbk_stub (call_frame_t *frame, fop_fentrylk_cbk_t fn,
		       int32_t op_ret, int32_t op_errno)
{
  call_stub_t *stub = NULL;

  if (!frame)
    return NULL;

  stub = stub_new (frame, 0, GF_FOP_FENTRYLK);
  if (!stub)
    return NULL;

  stub->args.fentrylk_cbk.fn       = fn;
  stub->args.fentrylk_cbk.op_ret   = op_ret;
  stub->args.fentrylk_cbk.op_errno = op_errno;

  return stub;
}


call_stub_t *
fop_setdents_stub (call_frame_t *frame,
		   fop_setdents_t fn,
		   fd_t *fd,
		   int32_t flags,
		   dir_entry_t *entries,
		   int32_t count)
{ 
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 1, GF_FOP_SETDENTS);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	if (fd)
		stub->args.setdents.fd = fd_ref (fd);
	stub->args.setdents.fn = fn;
	stub->args.setdents.flags = flags;
	stub->args.setdents.count = count;
	if (entries) {
		stub->args.setdents.entries.next = entries->next;
		entries->next = NULL;
	}
out:
	return stub;
}

call_stub_t *
fop_setdents_cbk_stub (call_frame_t *frame,
		       fop_setdents_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno)
{  
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_SETDENTS);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.setdents_cbk.fn = fn;
	stub->args.setdents_cbk.op_ret = op_ret;
	stub->args.setdents_cbk.op_errno = op_errno;
out:
	return stub;
  
}

call_stub_t *
fop_readdirp_cbk_stub (call_frame_t *frame,
		       fop_readdirp_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno,
		       gf_dirent_t *entries)
{
	call_stub_t *stub = NULL;
	gf_dirent_t *stub_entry = NULL, *entry = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_READDIRP);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.readdirp_cbk.fn = fn;
	stub->args.readdirp_cbk.op_ret = op_ret;
	stub->args.readdirp_cbk.op_errno = op_errno;
	INIT_LIST_HEAD (&stub->args.readdirp_cbk.entries.list);

        /* This check must come after the init of head above
         * so we're sure the list is empty for list_empty.
         */
        if (!entries)
                goto out;

	if (op_ret > 0) {
		list_for_each_entry (entry, &entries->list, list) {
			stub_entry = gf_dirent_for_name (entry->d_name);
			ERR_ABORT (stub_entry);
			stub_entry->d_off = entry->d_off;
			stub_entry->d_ino = entry->d_ino;
			stub_entry->d_stat = entry->d_stat;
			list_add_tail (&stub_entry->list,
				       &stub->args.readdirp_cbk.entries.list);
		}
	}
out:
	return stub;
}


call_stub_t *
fop_readdir_cbk_stub (call_frame_t *frame,
		      fop_readdir_cbk_t fn,
		      int32_t op_ret,
		      int32_t op_errno,
		      gf_dirent_t *entries)
{
	call_stub_t *stub = NULL;
	gf_dirent_t *stub_entry = NULL, *entry = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_READDIR);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
	
	stub->args.readdir_cbk.fn = fn;
	stub->args.readdir_cbk.op_ret = op_ret;
	stub->args.readdir_cbk.op_errno = op_errno;
	INIT_LIST_HEAD (&stub->args.readdir_cbk.entries.list);

        /* This check must come after the init of head above
         * so we're sure the list is empty for list_empty.
         */
        if (!entries)
                goto out;

	if (op_ret > 0) {
		list_for_each_entry (entry, &entries->list, list) {
			stub_entry = gf_dirent_for_name (entry->d_name);
			ERR_ABORT (stub_entry);
			stub_entry->d_off = entry->d_off;
			stub_entry->d_ino = entry->d_ino;

			list_add_tail (&stub_entry->list, 
				       &stub->args.readdir_cbk.entries.list);
		}
	}
out:
	return stub;
}

call_stub_t *
fop_readdir_stub (call_frame_t *frame,
		  fop_readdir_t fn,
		  fd_t *fd,
		  size_t size,
		  off_t off)
{
  call_stub_t *stub = NULL;

  stub = stub_new (frame, 1, GF_FOP_READDIR);
  stub->args.readdir.fn = fn;
  stub->args.readdir.fd = fd_ref (fd);
  stub->args.readdir.size = size;
  stub->args.readdir.off = off;

  return stub;
}

call_stub_t *
fop_readdirp_stub (call_frame_t *frame,
		   fop_readdirp_t fn,
		   fd_t *fd,
		   size_t size,
		   off_t off)
{
  call_stub_t *stub = NULL;

  stub = stub_new (frame, 1, GF_FOP_READDIRP);
  stub->args.readdirp.fn = fn;
  stub->args.readdirp.fd = fd_ref (fd);
  stub->args.readdirp.size = size;
  stub->args.readdirp.off = off;

  return stub;
}

call_stub_t *
fop_checksum_stub (call_frame_t *frame,
		   fop_checksum_t fn,
		   loc_t *loc,
		   int32_t flags)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", loc, out);

	stub = stub_new (frame, 1, GF_FOP_CHECKSUM);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.checksum.fn = fn;
	loc_copy (&stub->args.checksum.loc, loc);
	stub->args.checksum.flags = flags;
out:
	return stub;
}


call_stub_t *
fop_checksum_cbk_stub (call_frame_t *frame,
		       fop_checksum_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno,
		       uint8_t *file_checksum,
		       uint8_t *dir_checksum)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_CHECKSUM);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.checksum_cbk.fn = fn;
	stub->args.checksum_cbk.op_ret = op_ret;
	stub->args.checksum_cbk.op_errno = op_errno;
	if (op_ret >= 0)
	{
		stub->args.checksum_cbk.file_checksum = 
			memdup (file_checksum, NAME_MAX);

		stub->args.checksum_cbk.dir_checksum = 
			memdup (dir_checksum, NAME_MAX);
	}
out:
	return stub;
}


call_stub_t *
fop_rchecksum_stub (call_frame_t *frame,
                    fop_rchecksum_t fn,
                    fd_t *fd, off_t offset,
                    int32_t len)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	GF_VALIDATE_OR_GOTO ("call-stub", fd, out);

	stub = stub_new (frame, 1, GF_FOP_RCHECKSUM);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.rchecksum.fn = fn;
        stub->args.rchecksum.fd = fd_ref (fd);
	stub->args.rchecksum.offset = offset;
	stub->args.rchecksum.len    = len;
out:
	return stub;
}


call_stub_t *
fop_rchecksum_cbk_stub (call_frame_t *frame,
                        fop_rchecksum_cbk_t fn,
                        int32_t op_ret,
                        int32_t op_errno,
                        uint32_t weak_checksum,
                        uint8_t *strong_checksum)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_RCHECKSUM);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.rchecksum_cbk.fn = fn;
	stub->args.rchecksum_cbk.op_ret = op_ret;
	stub->args.rchecksum_cbk.op_errno = op_errno;

	if (op_ret >= 0)
	{
		stub->args.rchecksum_cbk.weak_checksum =
                        weak_checksum;

		stub->args.rchecksum_cbk.strong_checksum = 
			memdup (strong_checksum, MD5_DIGEST_LEN);
	}
out:
	return stub;
}


call_stub_t *
fop_xattrop_cbk_stub (call_frame_t *frame,
		      fop_xattrop_cbk_t fn,
		      int32_t op_ret,
		      int32_t op_errno)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	
	stub = stub_new (frame, 0, GF_FOP_XATTROP);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.xattrop_cbk.fn       = fn;
	stub->args.xattrop_cbk.op_ret   = op_ret;
	stub->args.xattrop_cbk.op_errno = op_errno;

out:
	return stub;
}


call_stub_t *
fop_fxattrop_cbk_stub (call_frame_t *frame,
		       fop_fxattrop_cbk_t fn,
		       int32_t op_ret,
		       int32_t op_errno,
		       dict_t *xattr)
{
	call_stub_t *stub = NULL;
	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);

	stub = stub_new (frame, 0, GF_FOP_FXATTROP);
	stub->args.fxattrop_cbk.fn = fn;
	stub->args.fxattrop_cbk.op_ret = op_ret;
	stub->args.fxattrop_cbk.op_errno = op_errno;
	if (xattr) 
		stub->args.fxattrop_cbk.xattr = dict_ref (xattr);

out:
	return stub;
}


call_stub_t *
fop_xattrop_stub (call_frame_t *frame,
		  fop_xattrop_t fn,
		  loc_t *loc,
		  gf_xattrop_flags_t optype,
		  dict_t *xattr)
{
	call_stub_t *stub = NULL;

	if (!frame || !xattr)
		return NULL;

	stub = stub_new (frame, 1, GF_FOP_XATTROP);
	if (!stub)
		return NULL;

	stub->args.xattrop.fn = fn;
	
	loc_copy (&stub->args.xattrop.loc, loc);

	stub->args.xattrop.optype = optype;
	stub->args.xattrop.xattr = dict_ref (xattr);

	return stub;
}

call_stub_t *
fop_fxattrop_stub (call_frame_t *frame,
		   fop_fxattrop_t fn,
		   fd_t *fd,
		   gf_xattrop_flags_t optype,
		   dict_t *xattr)
{
	call_stub_t *stub = NULL;

	if (!frame || !xattr)
		return NULL;

	stub = stub_new (frame, 1, GF_FOP_FXATTROP);
	if (!stub)
		return NULL;

	stub->args.fxattrop.fn = fn;
	
	stub->args.fxattrop.fd = fd_ref (fd);

	stub->args.fxattrop.optype = optype;
	stub->args.fxattrop.xattr = dict_ref (xattr);

	return stub;
}


call_stub_t *
fop_lock_notify_cbk_stub (call_frame_t *frame, fop_lock_notify_cbk_t fn,
                          int32_t op_ret, int32_t op_errno)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	
	stub = stub_new (frame, 0, GF_FOP_LOCK_NOTIFY);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.lock_notify_cbk.fn       = fn;
	stub->args.lock_notify_cbk.op_ret   = op_ret;
	stub->args.lock_notify_cbk.op_errno = op_errno;

out:
	return stub;
}


call_stub_t *
fop_lock_notify_stub (call_frame_t *frame, fop_lock_notify_t fn,
		      loc_t *loc, int32_t timeout)
{
	call_stub_t *stub = NULL;

	if (!frame)
		return NULL;

	stub = stub_new (frame, 1, GF_FOP_LOCK_NOTIFY);
	if (!stub)
		return NULL;

	stub->args.lock_notify.fn = fn;
	
	loc_copy (&stub->args.lock_notify.loc, loc);

	stub->args.lock_notify.timeout = timeout;

	return stub;
}


call_stub_t *
fop_lock_fnotify_cbk_stub (call_frame_t *frame, fop_lock_fnotify_cbk_t fn,
                           int32_t op_ret, int32_t op_errno)
{
	call_stub_t *stub = NULL;

	GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
	
	stub = stub_new (frame, 0, GF_FOP_LOCK_FNOTIFY);
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	stub->args.lock_fnotify_cbk.fn       = fn;
	stub->args.lock_fnotify_cbk.op_ret   = op_ret;
	stub->args.lock_fnotify_cbk.op_errno = op_errno;

out:
	return stub;
}


call_stub_t *
fop_lock_fnotify_stub (call_frame_t *frame, fop_lock_fnotify_t fn,
		       fd_t *fd, int32_t timeout)
{
	call_stub_t *stub = NULL;

	if (!frame)
		return NULL;

	stub = stub_new (frame, 1, GF_FOP_LOCK_FNOTIFY);
	if (!stub)
		return NULL;

	stub->args.lock_fnotify.fn = fn;

	stub->args.lock_fnotify.fd      = fd_ref (fd);
	stub->args.lock_fnotify.timeout = timeout;

	return stub;
}

call_stub_t *
fop_setattr_cbk_stub (call_frame_t *frame,
                      fop_setattr_cbk_t fn,
                      int32_t op_ret,
                      int32_t op_errno,
                      struct stat *statpre,
                      struct stat *statpost)
{
        call_stub_t *stub = NULL;

        if (frame == NULL)
                goto out;

	stub = stub_new (frame, 0, GF_FOP_SETATTR);
	if (stub == NULL)
                goto out;

	stub->args.setattr_cbk.fn = fn;

        stub->args.setattr_cbk.op_ret = op_ret;
        stub->args.setattr_cbk.op_errno = op_errno;

        if (statpre)
                stub->args.setattr_cbk.statpre = *statpre;
        if (statpost)
                stub->args.setattr_cbk.statpost = *statpost;

out:
	return stub;
}

call_stub_t *
fop_fsetattr_cbk_stub (call_frame_t *frame,
                       fop_setattr_cbk_t fn,
                       int32_t op_ret,
                       int32_t op_errno,
                       struct stat *statpre,
                       struct stat *statpost)
{
        call_stub_t *stub = NULL;

        if (frame == NULL)
                goto out;

	stub = stub_new (frame, 0, GF_FOP_FSETATTR);
	if (stub == NULL)
                goto out;

	stub->args.fsetattr_cbk.fn = fn;

        stub->args.fsetattr_cbk.op_ret = op_ret;
        stub->args.fsetattr_cbk.op_errno = op_errno;

        if (statpre)
                stub->args.setattr_cbk.statpre = *statpre;
        if (statpost)
                stub->args.fsetattr_cbk.statpost = *statpost;
out:
	return stub;
}

call_stub_t *
fop_setattr_stub (call_frame_t *frame,
                  fop_setattr_t fn,
                  loc_t *loc,
                  struct stat *stbuf,
                  int32_t valid)
{
        call_stub_t *stub = NULL;

        if (frame == NULL)
                goto out;

        if (fn == NULL)
                goto out;

	stub = stub_new (frame, 1, GF_FOP_SETATTR);
	if (stub == NULL)
                goto out;

	stub->args.setattr.fn = fn;

	loc_copy (&stub->args.setattr.loc, loc);

        if (stbuf)
                stub->args.setattr.stbuf = *stbuf;

        stub->args.setattr.valid = valid;

out:
	return stub;
}

call_stub_t *
fop_fsetattr_stub (call_frame_t *frame,
                   fop_fsetattr_t fn,
                   fd_t *fd,
                   struct stat *stbuf,
                   int32_t valid)
{
        call_stub_t *stub = NULL;

        if (frame == NULL)
                goto out;

        if (fn == NULL)
                goto out;

	stub = stub_new (frame, 1, GF_FOP_FSETATTR);
	if (stub == NULL)
                goto out;

	stub->args.fsetattr.fn = fn;

        if (fd)
                stub->args.fsetattr.fd = fd_ref (fd);

        if (stbuf)
                stub->args.fsetattr.stbuf = *stbuf;

        stub->args.fsetattr.valid = valid;

out:
	return stub;
}

static void
call_resume_wind (call_stub_t *stub)
{
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	switch (stub->fop) {
	case GF_FOP_OPEN:
	{
		stub->args.open.fn (stub->frame, 
				    stub->frame->this,
				    &stub->args.open.loc, 
				    stub->args.open.flags, stub->args.open.fd,
                                    stub->args.open.wbflags);
		break;
	}
	case GF_FOP_CREATE:
	{
		stub->args.create.fn (stub->frame,
				      stub->frame->this,
				      &stub->args.create.loc,
				      stub->args.create.flags,
				      stub->args.create.mode,
				      stub->args.create.fd);
		break;
	}
	case GF_FOP_STAT:
	{
		stub->args.stat.fn (stub->frame,
				    stub->frame->this,
				    &stub->args.stat.loc);
		break;
	}
	case GF_FOP_READLINK:
	{
		stub->args.readlink.fn (stub->frame,
					stub->frame->this,
					&stub->args.readlink.loc,
					stub->args.readlink.size);
		break;
	}
  
	case GF_FOP_MKNOD:
	{
		stub->args.mknod.fn (stub->frame,
				     stub->frame->this,
				     &stub->args.mknod.loc,
				     stub->args.mknod.mode,
				     stub->args.mknod.rdev);
	}
	break;
  
	case GF_FOP_MKDIR:
	{
		stub->args.mkdir.fn (stub->frame,
				     stub->frame->this,
				     &stub->args.mkdir.loc,
				     stub->args.mkdir.mode);
	}
	break;
  
	case GF_FOP_UNLINK:
	{
		stub->args.unlink.fn (stub->frame,
				      stub->frame->this,
				      &stub->args.unlink.loc);
	}
	break;

	case GF_FOP_RMDIR:
	{
		stub->args.rmdir.fn (stub->frame,
				     stub->frame->this,
				     &stub->args.rmdir.loc);
	}
	break;
      
	case GF_FOP_SYMLINK:
	{
		stub->args.symlink.fn (stub->frame,
				       stub->frame->this,
				       stub->args.symlink.linkname,
				       &stub->args.symlink.loc);
	}
	break;
  
	case GF_FOP_RENAME:
	{
		stub->args.rename.fn (stub->frame,
				      stub->frame->this,
				      &stub->args.rename.old,
				      &stub->args.rename.new);
	}
	break;

	case GF_FOP_LINK:
	{
		stub->args.link.fn (stub->frame,
				    stub->frame->this,
				    &stub->args.link.oldloc,
				    &stub->args.link.newloc);
	}
	break;
  
	case GF_FOP_TRUNCATE:
	{
		stub->args.truncate.fn (stub->frame,
					stub->frame->this,
					&stub->args.truncate.loc,
					stub->args.truncate.off);
		break;
	}
      
	case GF_FOP_READ:
	{
		stub->args.readv.fn (stub->frame,
				     stub->frame->this,
				     stub->args.readv.fd,
				     stub->args.readv.size,
				     stub->args.readv.off);
		break;
	}
  
	case GF_FOP_WRITE:
	{
		stub->args.writev.fn (stub->frame,
				      stub->frame->this,
				      stub->args.writev.fd,
				      stub->args.writev.vector,
				      stub->args.writev.count,
				      stub->args.writev.off,
                                      stub->args.writev.iobref);
		break;
	}
  
	case GF_FOP_STATFS:
	{
		stub->args.statfs.fn (stub->frame,
				      stub->frame->this,
				      &stub->args.statfs.loc);
		break;
	}
	case GF_FOP_FLUSH:
	{
		stub->args.flush.fn (stub->frame,
				     stub->frame->this,
				     stub->args.flush.fd);
		break;
	}
  
	case GF_FOP_FSYNC:
	{
		stub->args.fsync.fn (stub->frame,
				     stub->frame->this,
				     stub->args.fsync.fd,
				     stub->args.fsync.datasync);
		break;
	}

	case GF_FOP_SETXATTR:
	{
		stub->args.setxattr.fn (stub->frame,
					stub->frame->this,
					&stub->args.setxattr.loc,
					stub->args.setxattr.dict,
					stub->args.setxattr.flags);
		break;
	}
  
	case GF_FOP_GETXATTR:
	{
		stub->args.getxattr.fn (stub->frame,
					stub->frame->this,
					&stub->args.getxattr.loc,
					stub->args.getxattr.name);
		break;
	}

	case GF_FOP_FSETXATTR:
	{
		stub->args.fsetxattr.fn (stub->frame,
                                         stub->frame->this,
                                         stub->args.fsetxattr.fd,
                                         stub->args.fsetxattr.dict,
                                         stub->args.fsetxattr.flags);
		break;
	}

	case GF_FOP_FGETXATTR:
	{
		stub->args.fgetxattr.fn (stub->frame,
                                         stub->frame->this,
                                         stub->args.fgetxattr.fd,
                                         stub->args.fgetxattr.name);
		break;
	}

	case GF_FOP_REMOVEXATTR:
	{
		stub->args.removexattr.fn (stub->frame,
					   stub->frame->this,
					   &stub->args.removexattr.loc,
					   stub->args.removexattr.name);
		break;
	}
  
	case GF_FOP_OPENDIR:
	{
		stub->args.opendir.fn (stub->frame,
				       stub->frame->this,
				       &stub->args.opendir.loc,
				       stub->args.opendir.fd);
		break;
	}

	case GF_FOP_GETDENTS:
	{
		stub->args.getdents.fn (stub->frame,
					stub->frame->this,
					stub->args.getdents.fd,
					stub->args.getdents.size,
					stub->args.getdents.off,
					stub->args.getdents.flag);
		break;
	}

	case GF_FOP_FSYNCDIR:
	{
		stub->args.fsyncdir.fn (stub->frame,
					stub->frame->this,
					stub->args.fsyncdir.fd,
					stub->args.fsyncdir.datasync);
		break;
	}
  
	case GF_FOP_ACCESS:
	{
		stub->args.access.fn (stub->frame,
				      stub->frame->this,
				      &stub->args.access.loc,
				      stub->args.access.mask);
		break;
	}
  
	case GF_FOP_FTRUNCATE:
	{
		stub->args.ftruncate.fn (stub->frame,
					 stub->frame->this,
					 stub->args.ftruncate.fd,
					 stub->args.ftruncate.off);
		break;
	}
  
	case GF_FOP_FSTAT:
	{
		stub->args.fstat.fn (stub->frame,
				     stub->frame->this,
				     stub->args.fstat.fd);
		break;
	}
  
	case GF_FOP_LK:
	{
		stub->args.lk.fn (stub->frame,
				  stub->frame->this,
				  stub->args.lk.fd,
				  stub->args.lk.cmd,
				  &stub->args.lk.lock);
		break;
	}

	case GF_FOP_INODELK:
	{
		stub->args.inodelk.fn (stub->frame,
				       stub->frame->this,
                                       stub->args.inodelk.volume,
				       &stub->args.inodelk.loc,
				       stub->args.inodelk.cmd,
				       &stub->args.inodelk.lock);
		break;
	}

	case GF_FOP_FINODELK:
	{
		stub->args.finodelk.fn (stub->frame,
					stub->frame->this,
                                        stub->args.finodelk.volume,
					stub->args.finodelk.fd,
					stub->args.finodelk.cmd,
					&stub->args.finodelk.lock);
		break;
	}

	case GF_FOP_ENTRYLK:
	{
		stub->args.entrylk.fn (stub->frame,
				       stub->frame->this,
                                       stub->args.entrylk.volume,
				       &stub->args.entrylk.loc,
				       stub->args.entrylk.name,
				       stub->args.entrylk.cmd,
				       stub->args.entrylk.type);
		break;
	}

	case GF_FOP_FENTRYLK:
	{
		stub->args.fentrylk.fn (stub->frame,
					stub->frame->this,
                                        stub->args.fentrylk.volume,
					stub->args.fentrylk.fd,
					stub->args.fentrylk.name,
					stub->args.fentrylk.cmd,
					stub->args.fentrylk.type);
		break;
	}
  
	break;
  
	case GF_FOP_LOOKUP:
	{
		stub->args.lookup.fn (stub->frame, 
				      stub->frame->this,
				      &stub->args.lookup.loc,
				      stub->args.lookup.xattr_req);
		break;
	}

	case GF_FOP_SETDENTS:
	{
		stub->args.setdents.fn (stub->frame,
					stub->frame->this,
					stub->args.setdents.fd,
					stub->args.setdents.flags,
					&stub->args.setdents.entries,
					stub->args.setdents.count);
		break;
	}

	case GF_FOP_CHECKSUM:
	{
		stub->args.checksum.fn (stub->frame,
					stub->frame->this,
					&stub->args.checksum.loc,
					stub->args.checksum.flags);
		break;
	}

	case GF_FOP_RCHECKSUM:
	{
		stub->args.rchecksum.fn (stub->frame,
                                         stub->frame->this,
                                         stub->args.rchecksum.fd,
                                         stub->args.rchecksum.offset,
                                         stub->args.rchecksum.len);
		break;
	}

	case GF_FOP_READDIR:
	{
		stub->args.readdir.fn (stub->frame,
				       stub->frame->this,
				       stub->args.readdir.fd,
				       stub->args.readdir.size,
				       stub->args.readdir.off);
		break;
	}

        case GF_FOP_READDIRP:
	{
		stub->args.readdirp.fn (stub->frame,
				        stub->frame->this,
				        stub->args.readdirp.fd,
				        stub->args.readdirp.size,
				        stub->args.readdirp.off);
		break;
	}

	case GF_FOP_XATTROP:
	{
		stub->args.xattrop.fn (stub->frame,
				       stub->frame->this,
				       &stub->args.xattrop.loc,
				       stub->args.xattrop.optype,
				       stub->args.xattrop.xattr);

		break;
	}
	case GF_FOP_FXATTROP:
	{
		stub->args.fxattrop.fn (stub->frame,
					stub->frame->this,
					stub->args.fxattrop.fd,
					stub->args.fxattrop.optype,
					stub->args.fxattrop.xattr);

		break;
	}
	case GF_FOP_LOCK_NOTIFY:
	{
		stub->args.lock_notify.fn (stub->frame,
					   stub->frame->this,
					   &stub->args.lock_notify.loc,
					   stub->args.lock_notify.timeout);
		break;
	}
	case GF_FOP_LOCK_FNOTIFY:
	{
		stub->args.lock_fnotify.fn (stub->frame,
					    stub->frame->this,
					    stub->args.lock_fnotify.fd,
					    stub->args.lock_fnotify.timeout);
		break;
	}
        case GF_FOP_SETATTR:
        {
                stub->args.setattr.fn (stub->frame,
                                       stub->frame->this,
                                       &stub->args.setattr.loc,
                                       &stub->args.setattr.stbuf,
                                       stub->args.setattr.valid);
                break;
        }
        case GF_FOP_FSETATTR:
        {
                stub->args.fsetattr.fn (stub->frame,
                                        stub->frame->this,
                                        stub->args.fsetattr.fd,
                                        &stub->args.fsetattr.stbuf,
                                        stub->args.fsetattr.valid);
                break;
        }
	default:
	{
		gf_log ("call-stub",
			GF_LOG_DEBUG,
			"Invalid value of FOP");
	}
	break;
	}
out:
	return;
}



static void
call_resume_unwind (call_stub_t *stub)
{
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	switch (stub->fop) {
	case GF_FOP_OPEN:
	{
		if (!stub->args.open_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.open_cbk.op_ret,
				      stub->args.open_cbk.op_errno,
				      stub->args.open_cbk.fd);
		else
			stub->args.open_cbk.fn (stub->frame, 
						stub->frame->cookie,
						stub->frame->this,
						stub->args.open_cbk.op_ret, 
						stub->args.open_cbk.op_errno,
						stub->args.open_cbk.fd);
		break;
	}

	case GF_FOP_CREATE:
	{
		if (!stub->args.create_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.create_cbk.op_ret,
				      stub->args.create_cbk.op_errno,
				      stub->args.create_cbk.fd,
				      stub->args.create_cbk.inode,
				      &stub->args.create_cbk.buf,
                                      &stub->args.create_cbk.preparent,
                                      &stub->args.create_cbk.postparent);
		else
			stub->args.create_cbk.fn (stub->frame,
						  stub->frame->cookie,
						  stub->frame->this,
						  stub->args.create_cbk.op_ret,
						  stub->args.create_cbk.op_errno,
						  stub->args.create_cbk.fd,
						  stub->args.create_cbk.inode,
						  &stub->args.create_cbk.buf,
                                                  &stub->args.create_cbk.preparent,
                                                  &stub->args.create_cbk.postparent);
      
		break;
	}

	case GF_FOP_STAT:
	{
		if (!stub->args.stat_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.stat_cbk.op_ret,
				      stub->args.stat_cbk.op_errno,
				      &stub->args.stat_cbk.buf);
		else
			stub->args.stat_cbk.fn (stub->frame,
						stub->frame->cookie,
						stub->frame->this,
						stub->args.stat_cbk.op_ret,
						stub->args.stat_cbk.op_errno,
						&stub->args.stat_cbk.buf);

		break;
	}

	case GF_FOP_READLINK:
	{
		if (!stub->args.readlink_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.readlink_cbk.op_ret,
				      stub->args.readlink_cbk.op_errno,
				      stub->args.readlink_cbk.buf,
                                      &stub->args.readlink_cbk.sbuf);
		else
			stub->args.readlink_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.readlink_cbk.op_ret,
						    stub->args.readlink_cbk.op_errno,
						    stub->args.readlink_cbk.buf,
                                                    &stub->args.readlink_cbk.sbuf);

		break;
	}
  
	case GF_FOP_MKNOD:
	{
		if (!stub->args.mknod_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.mknod_cbk.op_ret,
				      stub->args.mknod_cbk.op_errno,
				      stub->args.mknod_cbk.inode,
                                      &stub->args.mknod_cbk.buf,
                                      &stub->args.mknod_cbk.preparent,
                                      &stub->args.mknod_cbk.postparent);
		else
			stub->args.mknod_cbk.fn (stub->frame,
						 stub->frame->cookie,
						 stub->frame->this,
						 stub->args.mknod_cbk.op_ret,
						 stub->args.mknod_cbk.op_errno,
						 stub->args.mknod_cbk.inode,
                                                 &stub->args.mknod_cbk.buf,
                                                 &stub->args.mknod_cbk.preparent,
                                                 &stub->args.mknod_cbk.postparent);
		break;
	}

	case GF_FOP_MKDIR:
	{
		if (!stub->args.mkdir_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.mkdir_cbk.op_ret,
				      stub->args.mkdir_cbk.op_errno,
				      stub->args.mkdir_cbk.inode,
                                      &stub->args.mkdir_cbk.buf,
                                      &stub->args.mkdir_cbk.preparent,
                                      &stub->args.mkdir_cbk.postparent);
		else
			stub->args.mkdir_cbk.fn (stub->frame,
						 stub->frame->cookie,
						 stub->frame->this,
						 stub->args.mkdir_cbk.op_ret,
						 stub->args.mkdir_cbk.op_errno,
						 stub->args.mkdir_cbk.inode,
                                                 &stub->args.mkdir_cbk.buf,
                                                 &stub->args.mkdir_cbk.preparent,
                                                 &stub->args.mkdir_cbk.postparent);

		if (stub->args.mkdir_cbk.inode)
			inode_unref (stub->args.mkdir_cbk.inode);

		break;
	}
  
	case GF_FOP_UNLINK:
	{
		if (!stub->args.unlink_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.unlink_cbk.op_ret,
				      stub->args.unlink_cbk.op_errno,
                                      &stub->args.unlink_cbk.preparent,
                                      &stub->args.unlink_cbk.postparent);
		else
			stub->args.unlink_cbk.fn (stub->frame,
						  stub->frame->cookie,
						  stub->frame->this,
						  stub->args.unlink_cbk.op_ret,
						  stub->args.unlink_cbk.op_errno,
                                                  &stub->args.unlink_cbk.preparent,
                                                  &stub->args.unlink_cbk.postparent);
		break;
	}
  
	case GF_FOP_RMDIR:
	{
		if (!stub->args.rmdir_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.rmdir_cbk.op_ret,
				      stub->args.rmdir_cbk.op_errno,
                                      &stub->args.rmdir_cbk.preparent,
                                      &stub->args.rmdir_cbk.postparent);
		else
			stub->args.unlink_cbk.fn (stub->frame,
						  stub->frame->cookie,
						  stub->frame->this,
						  stub->args.rmdir_cbk.op_ret,
						  stub->args.rmdir_cbk.op_errno,
                                                  &stub->args.rmdir_cbk.preparent,
                                                  &stub->args.rmdir_cbk.postparent);
		break;
	}

	case GF_FOP_SYMLINK:
	{
		if (!stub->args.symlink_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.symlink_cbk.op_ret,
				      stub->args.symlink_cbk.op_errno,
				      stub->args.symlink_cbk.inode,
                                      &stub->args.symlink_cbk.buf,
                                      &stub->args.symlink_cbk.preparent,
                                      &stub->args.symlink_cbk.postparent);
		else
			stub->args.symlink_cbk.fn (stub->frame,
						   stub->frame->cookie,
						   stub->frame->this,
						   stub->args.symlink_cbk.op_ret,
						   stub->args.symlink_cbk.op_errno,
						   stub->args.symlink_cbk.inode,
                                                   &stub->args.symlink_cbk.buf,
                                                   &stub->args.symlink_cbk.preparent,
                                                   &stub->args.symlink_cbk.postparent);
	}
	break;
  
	case GF_FOP_RENAME:
	{
#if 0
		if (!stub->args.rename_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.rename_cbk.op_ret,
				      stub->args.rename_cbk.op_errno,
				      &stub->args.rename_cbk.buf,
                                      &stub->args.rename_cbk.preoldparent,
                                      &stub->args.rename_cbk.postoldparent,
                                      &stub->args.rename_cbk.prenewparent,
                                      &stub->args.rename_cbk.postnewparent);
		else
			stub->args.rename_cbk.fn (stub->frame,
						  stub->frame->cookie,
						  stub->frame->this,
						  stub->args.rename_cbk.op_ret,
						  stub->args.rename_cbk.op_errno,
						  &stub->args.rename_cbk.buf,
                                                  &stub->args.rename_cbk.preoldparent,
                                                  &stub->args.rename_cbk.postoldparent,
                                                  &stub->args.rename_cbk.prenewparent,
                                                  &stub->args.rename_cbk.postnewparent);
#endif
		break;
	}
  
	case GF_FOP_LINK:
	{
		if (!stub->args.link_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.link_cbk.op_ret,
				      stub->args.link_cbk.op_errno,
				      stub->args.link_cbk.inode,
				      &stub->args.link_cbk.buf);
		else
			stub->args.link_cbk.fn (stub->frame,
						stub->frame->cookie,
						stub->frame->this,
						stub->args.link_cbk.op_ret,
						stub->args.link_cbk.op_errno,
						stub->args.link_cbk.inode,
                                                &stub->args.link_cbk.buf,
                                                &stub->args.link_cbk.preparent,
                                                &stub->args.link_cbk.postparent);
		break;
	}
  
	case GF_FOP_TRUNCATE:
	{
		if (!stub->args.truncate_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.truncate_cbk.op_ret,
				      stub->args.truncate_cbk.op_errno,
				      &stub->args.truncate_cbk.prebuf,
                                      &stub->args.truncate_cbk.postbuf);
		else
			stub->args.truncate_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.truncate_cbk.op_ret,
						    stub->args.truncate_cbk.op_errno,
						    &stub->args.truncate_cbk.prebuf,
                                                    &stub->args.truncate_cbk.postbuf);
		break;
	}
      
	case GF_FOP_READ:
	{
		if (!stub->args.readv_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.readv_cbk.op_ret,
				      stub->args.readv_cbk.op_errno,
				      stub->args.readv_cbk.vector,
				      stub->args.readv_cbk.count,
				      &stub->args.readv_cbk.stbuf,
                                      stub->args.readv_cbk.iobref);
		else
			stub->args.readv_cbk.fn (stub->frame,
						 stub->frame->cookie,
						 stub->frame->this,
						 stub->args.readv_cbk.op_ret,
						 stub->args.readv_cbk.op_errno,
						 stub->args.readv_cbk.vector,
						 stub->args.readv_cbk.count,
						 &stub->args.readv_cbk.stbuf,
                                                 stub->args.readv_cbk.iobref);
	}
	break;
  
	case GF_FOP_WRITE:
	{
		if (!stub->args.writev_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.writev_cbk.op_ret,
				      stub->args.writev_cbk.op_errno,
                                      &stub->args.writev_cbk.prebuf,
				      &stub->args.writev_cbk.postbuf);
		else
			stub->args.writev_cbk.fn (stub->frame,
						  stub->frame->cookie,
						  stub->frame->this,
						  stub->args.writev_cbk.op_ret,
						  stub->args.writev_cbk.op_errno,
                                                  &stub->args.writev_cbk.prebuf,
						  &stub->args.writev_cbk.postbuf);
		break;
	}
  
	case GF_FOP_STATFS:
	{
		if (!stub->args.statfs_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.statfs_cbk.op_ret,
				      stub->args.statfs_cbk.op_errno,
				      &(stub->args.statfs_cbk.buf));
		else
			stub->args.statfs_cbk.fn (stub->frame,
						  stub->frame->cookie,
						  stub->frame->this,
						  stub->args.statfs_cbk.op_ret,
						  stub->args.statfs_cbk.op_errno,
						  &(stub->args.statfs_cbk.buf));
	}
	break;
  
	case GF_FOP_FLUSH:
	{
		if (!stub->args.flush_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.flush_cbk.op_ret,
				      stub->args.flush_cbk.op_errno);
		else
			stub->args.flush_cbk.fn (stub->frame,
						 stub->frame->cookie,
						 stub->frame->this,
						 stub->args.flush_cbk.op_ret,
						 stub->args.flush_cbk.op_errno);
      
		break;
	}
  
	case GF_FOP_FSYNC:
	{
		if (!stub->args.fsync_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.fsync_cbk.op_ret,
				      stub->args.fsync_cbk.op_errno,
                                      &stub->args.fsync_cbk.prebuf,
                                      &stub->args.fsync_cbk.postbuf);
		else
			stub->args.fsync_cbk.fn (stub->frame,
						 stub->frame->cookie,
						 stub->frame->this,
						 stub->args.fsync_cbk.op_ret,
						 stub->args.fsync_cbk.op_errno,
                                                 &stub->args.fsync_cbk.prebuf,
                                                 &stub->args.fsync_cbk.postbuf);
		break;
	}
  
	case GF_FOP_SETXATTR:
	{
		if (!stub->args.setxattr_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.setxattr_cbk.op_ret,
				      stub->args.setxattr_cbk.op_errno);

		else
			stub->args.setxattr_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.setxattr_cbk.op_ret,
						    stub->args.setxattr_cbk.op_errno);

		break;
	}
  
	case GF_FOP_GETXATTR:
	{
		if (!stub->args.getxattr_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.getxattr_cbk.op_ret,
				      stub->args.getxattr_cbk.op_errno,
				      stub->args.getxattr_cbk.dict);
		else
			stub->args.getxattr_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.getxattr_cbk.op_ret,
						    stub->args.getxattr_cbk.op_errno,
						    stub->args.getxattr_cbk.dict);
		break;
	}

	case GF_FOP_FSETXATTR:
	{
		if (!stub->args.fsetxattr_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.fsetxattr_cbk.op_ret,
				      stub->args.fsetxattr_cbk.op_errno);

		else
			stub->args.fsetxattr_cbk.fn (stub->frame,
                                                     stub->frame->cookie,
                                                     stub->frame->this,
                                                     stub->args.fsetxattr_cbk.op_ret,
                                                     stub->args.fsetxattr_cbk.op_errno);

		break;
	}

	case GF_FOP_FGETXATTR:
	{
		if (!stub->args.fgetxattr_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.fgetxattr_cbk.op_ret,
				      stub->args.fgetxattr_cbk.op_errno,
				      stub->args.fgetxattr_cbk.dict);
		else
			stub->args.fgetxattr_cbk.fn (stub->frame,
                                                     stub->frame->cookie,
                                                     stub->frame->this,
                                                     stub->args.fgetxattr_cbk.op_ret,
                                                     stub->args.fgetxattr_cbk.op_errno,
                                                     stub->args.fgetxattr_cbk.dict);
		break;
	}

	case GF_FOP_REMOVEXATTR:
	{
		if (!stub->args.removexattr_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.removexattr_cbk.op_ret,
				      stub->args.removexattr_cbk.op_errno);
		else
			stub->args.removexattr_cbk.fn (stub->frame,
						       stub->frame->cookie,
						       stub->frame->this,
						       stub->args.removexattr_cbk.op_ret,
						       stub->args.removexattr_cbk.op_errno);

		break;
	}
  
	case GF_FOP_OPENDIR:
	{
		if (!stub->args.opendir_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.opendir_cbk.op_ret,
				      stub->args.opendir_cbk.op_errno,
				      stub->args.opendir_cbk.fd);
		else
			stub->args.opendir_cbk.fn (stub->frame,
						   stub->frame->cookie,
						   stub->frame->this,
						   stub->args.opendir_cbk.op_ret,
						   stub->args.opendir_cbk.op_errno,
						   stub->args.opendir_cbk.fd);
		break;
	}
  
	case GF_FOP_GETDENTS:
	{
		if (!stub->args.getdents_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.getdents_cbk.op_ret,
				      stub->args.getdents_cbk.op_errno,
				      &stub->args.getdents_cbk.entries,
				      stub->args.getdents_cbk.count);
		else
			stub->args.getdents_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.getdents_cbk.op_ret,
						    stub->args.getdents_cbk.op_errno,
						    &stub->args.getdents_cbk.entries,
						    stub->args.getdents_cbk.count);
		break;
	}
  
	case GF_FOP_FSYNCDIR:
	{
		if (!stub->args.fsyncdir_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.fsyncdir_cbk.op_ret,
				      stub->args.fsyncdir_cbk.op_errno);
		else
			stub->args.fsyncdir_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.fsyncdir_cbk.op_ret,
						    stub->args.fsyncdir_cbk.op_errno);
		break;
	}
  
	case GF_FOP_ACCESS:
	{
		if (!stub->args.access_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.access_cbk.op_ret,
				      stub->args.access_cbk.op_errno);
		else
			stub->args.access_cbk.fn (stub->frame,
						  stub->frame->cookie,
						  stub->frame->this,
						  stub->args.access_cbk.op_ret,
						  stub->args.access_cbk.op_errno);

		break;
	}
  
	case GF_FOP_FTRUNCATE:
	{
		if (!stub->args.ftruncate_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.ftruncate_cbk.op_ret,
				      stub->args.ftruncate_cbk.op_errno,
				      &stub->args.ftruncate_cbk.prebuf,
				      &stub->args.ftruncate_cbk.postbuf);
		else
			stub->args.ftruncate_cbk.fn (stub->frame,
						     stub->frame->cookie,
						     stub->frame->this,
						     stub->args.ftruncate_cbk.op_ret,
						     stub->args.ftruncate_cbk.op_errno,
						     &stub->args.ftruncate_cbk.prebuf,
						     &stub->args.ftruncate_cbk.postbuf);
		break;
	}
  
	case GF_FOP_FSTAT:
	{
		if (!stub->args.fstat_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.fstat_cbk.op_ret,
				      stub->args.fstat_cbk.op_errno,
				      &stub->args.fstat_cbk.buf);
		else
			stub->args.fstat_cbk.fn (stub->frame,
						 stub->frame->cookie,
						 stub->frame->this,
						 stub->args.fstat_cbk.op_ret,
						 stub->args.fstat_cbk.op_errno,
						 &stub->args.fstat_cbk.buf);
      
		break;
	}
  
	case GF_FOP_LK:
	{
		if (!stub->args.lk_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.lk_cbk.op_ret,
				      stub->args.lk_cbk.op_errno,
				      &stub->args.lk_cbk.lock);
		else
			stub->args.lk_cbk.fn (stub->frame,
					      stub->frame->cookie,
					      stub->frame->this,
					      stub->args.lk_cbk.op_ret,
					      stub->args.lk_cbk.op_errno,
					      &stub->args.lk_cbk.lock);
		break;
	}

	case GF_FOP_INODELK:
	{
		if (!stub->args.inodelk_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.inodelk_cbk.op_ret,
				      stub->args.inodelk_cbk.op_errno);

		else
			stub->args.inodelk_cbk.fn (stub->frame,
						   stub->frame->cookie,
						   stub->frame->this,
						   stub->args.inodelk_cbk.op_ret,
						   stub->args.inodelk_cbk.op_errno);
		break;
	}

	case GF_FOP_FINODELK:
	{
		if (!stub->args.finodelk_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.finodelk_cbk.op_ret,
				      stub->args.finodelk_cbk.op_errno);

		else
			stub->args.finodelk_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.finodelk_cbk.op_ret,
						    stub->args.finodelk_cbk.op_errno);
		break;
	}

	case GF_FOP_ENTRYLK:
	{
		if (!stub->args.entrylk_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.entrylk_cbk.op_ret,
				      stub->args.entrylk_cbk.op_errno);

		else
			stub->args.entrylk_cbk.fn (stub->frame,
						   stub->frame->cookie,
						   stub->frame->this,
						   stub->args.entrylk_cbk.op_ret,
						   stub->args.entrylk_cbk.op_errno);
		break;
	}

	case GF_FOP_FENTRYLK:
	{
		if (!stub->args.fentrylk_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.fentrylk_cbk.op_ret,
				      stub->args.fentrylk_cbk.op_errno);

		else
			stub->args.fentrylk_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.fentrylk_cbk.op_ret,
						    stub->args.fentrylk_cbk.op_errno);
		break;
	}
  
	case GF_FOP_LOOKUP:
	{
		if (!stub->args.lookup_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.lookup_cbk.op_ret,
				      stub->args.lookup_cbk.op_errno,
				      stub->args.lookup_cbk.inode,
				      &stub->args.lookup_cbk.buf,
                                      stub->args.lookup_cbk.dict,
                                      &stub->args.lookup_cbk.postparent);
		else
			stub->args.lookup_cbk.fn (stub->frame, 
						  stub->frame->cookie,
						  stub->frame->this,
						  stub->args.lookup_cbk.op_ret,
						  stub->args.lookup_cbk.op_errno,
						  stub->args.lookup_cbk.inode,
                                                  &stub->args.lookup_cbk.buf,
                                                  stub->args.lookup_cbk.dict,
                                                  &stub->args.lookup_cbk.postparent);
		/* FIXME NULL should not be passed */

		if (stub->args.lookup_cbk.dict)
			dict_unref (stub->args.lookup_cbk.dict);
		if (stub->args.lookup_cbk.inode)
			inode_unref (stub->args.lookup_cbk.inode);

		break;
	}
	case GF_FOP_SETDENTS:
	{
		if (!stub->args.setdents_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.setdents_cbk.op_ret,
				      stub->args.setdents_cbk.op_errno);
		else
			stub->args.setdents_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.setdents_cbk.op_ret,
						    stub->args.setdents_cbk.op_errno);
		break;
	}

	case GF_FOP_CHECKSUM:
	{
		if (!stub->args.checksum_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.checksum_cbk.op_ret,
				      stub->args.checksum_cbk.op_errno,
				      stub->args.checksum_cbk.file_checksum,
				      stub->args.checksum_cbk.dir_checksum);
		else
			stub->args.checksum_cbk.fn (stub->frame, 
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.checksum_cbk.op_ret, 
						    stub->args.checksum_cbk.op_errno,
						    stub->args.checksum_cbk.file_checksum,
						    stub->args.checksum_cbk.dir_checksum);
		if (stub->args.checksum_cbk.op_ret >= 0)
		{
			FREE (stub->args.checksum_cbk.file_checksum);
			FREE (stub->args.checksum_cbk.dir_checksum);
		}

		break;
	}

	case GF_FOP_RCHECKSUM:
	{
		if (!stub->args.rchecksum_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.rchecksum_cbk.op_ret,
				      stub->args.rchecksum_cbk.op_errno,
				      stub->args.rchecksum_cbk.weak_checksum,
				      stub->args.rchecksum_cbk.strong_checksum);
		else
			stub->args.rchecksum_cbk.fn (stub->frame, 
                                                     stub->frame->cookie,
                                                     stub->frame->this,
                                                     stub->args.rchecksum_cbk.op_ret, 
                                                     stub->args.rchecksum_cbk.op_errno,
                                                     stub->args.rchecksum_cbk.weak_checksum,
                                                     stub->args.rchecksum_cbk.strong_checksum);
		if (stub->args.rchecksum_cbk.op_ret >= 0)
		{
			FREE (stub->args.rchecksum_cbk.strong_checksum);
		}

		break;
	}

	case GF_FOP_READDIR:
	{
		if (!stub->args.readdir_cbk.fn) 
			STACK_UNWIND (stub->frame,
				      stub->args.readdir_cbk.op_ret,
				      stub->args.readdir_cbk.op_errno,
				      &stub->args.readdir_cbk.entries);
		else 
			stub->args.readdir_cbk.fn (stub->frame,
						   stub->frame->cookie,
						   stub->frame->this,
						   stub->args.readdir_cbk.op_ret,
						   stub->args.readdir_cbk.op_errno,
						   &stub->args.readdir_cbk.entries);
		
		if (stub->args.readdir_cbk.op_ret > 0) 
			gf_dirent_free (&stub->args.readdir_cbk.entries);

		break;
	}

        case GF_FOP_READDIRP:
	{
		if (!stub->args.readdirp_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.readdirp_cbk.op_ret,
				      stub->args.readdirp_cbk.op_errno,
				      &stub->args.readdirp_cbk.entries);
		else
			stub->args.readdirp_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.readdirp_cbk.op_ret,
						    stub->args.readdirp_cbk.op_errno,
						    &stub->args.readdirp_cbk.entries);

		if (stub->args.readdirp_cbk.op_ret > 0)
			gf_dirent_free (&stub->args.readdirp_cbk.entries);

		break;
	}

	case GF_FOP_XATTROP:
	{
		if (!stub->args.xattrop_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.xattrop_cbk.op_ret,
				      stub->args.xattrop_cbk.op_errno);
		else
			stub->args.xattrop_cbk.fn (stub->frame,
						   stub->frame->cookie,
						   stub->frame->this,
						   stub->args.xattrop_cbk.op_ret,
						   stub->args.xattrop_cbk.op_errno,
						   stub->args.xattrop_cbk.xattr);

		if (stub->args.xattrop_cbk.xattr)
			dict_unref (stub->args.xattrop_cbk.xattr);

		break;
	}
	case GF_FOP_FXATTROP:
	{
		if (!stub->args.fxattrop_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.fxattrop_cbk.op_ret,
				      stub->args.fxattrop_cbk.op_errno);
		else
			stub->args.fxattrop_cbk.fn (stub->frame,
						    stub->frame->cookie,
						    stub->frame->this,
						    stub->args.fxattrop_cbk.op_ret,
						    stub->args.fxattrop_cbk.op_errno,
						    stub->args.fxattrop_cbk.xattr);

		if (stub->args.fxattrop_cbk.xattr)
			dict_unref (stub->args.fxattrop_cbk.xattr);

		break;
	}
	case GF_FOP_LOCK_NOTIFY:
	{
		if (!stub->args.lock_notify_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.lock_notify_cbk.op_ret,
				      stub->args.lock_notify_cbk.op_errno);
		else
			stub->args.lock_notify_cbk.fn (stub->frame,
						       stub->frame->cookie,
						       stub->frame->this,
						       stub->args.lock_notify_cbk.op_ret,
						       stub->args.lock_notify_cbk.op_errno);
		break;
	}
	case GF_FOP_LOCK_FNOTIFY:
	{
		if (!stub->args.lock_fnotify_cbk.fn)
			STACK_UNWIND (stub->frame,
				      stub->args.lock_fnotify_cbk.op_ret,
				      stub->args.lock_fnotify_cbk.op_errno);
		else
			stub->args.lock_fnotify_cbk.fn (stub->frame,
							stub->frame->cookie,
							stub->frame->this,
							stub->args.lock_fnotify_cbk.op_ret,
							stub->args.lock_fnotify_cbk.op_errno);
		break;
	}
        case GF_FOP_SETATTR:
        {
                if (!stub->args.setattr_cbk.fn)
                        STACK_UNWIND (stub->frame,
                                      stub->args.setattr_cbk.op_ret,
                                      stub->args.setattr_cbk.op_errno,
                                      &stub->args.setattr_cbk.statpre,
                                      &stub->args.setattr_cbk.statpost);
                else
                        stub->args.setattr_cbk.fn (
                                stub->frame,
                                stub->frame->cookie,
                                stub->frame->this,
                                stub->args.setattr_cbk.op_ret,
                                stub->args.setattr_cbk.op_errno,
                                &stub->args.setattr_cbk.statpre,
                                &stub->args.setattr_cbk.statpost);
                break;
        }
        case GF_FOP_FSETATTR:
        {
                if (!stub->args.fsetattr_cbk.fn)
                        STACK_UNWIND (stub->frame,
                                      stub->args.fsetattr_cbk.op_ret,
                                      stub->args.fsetattr_cbk.op_errno,
                                      &stub->args.fsetattr_cbk.statpre,
                                      &stub->args.fsetattr_cbk.statpost);
                else
                        stub->args.fsetattr_cbk.fn (
                                stub->frame,
                                stub->frame->cookie,
                                stub->frame->this,
                                stub->args.fsetattr_cbk.op_ret,
                                stub->args.fsetattr_cbk.op_errno,
                                &stub->args.fsetattr_cbk.statpre,
                                &stub->args.fsetattr_cbk.statpost);
                break;
        }
	case GF_FOP_MAXVALUE:
	{
		gf_log ("call-stub",
			GF_LOG_DEBUG,
			"Invalid value of FOP");
	}
	break;
	}
out:
	return;
}


static void
call_stub_destroy_wind (call_stub_t *stub)
{
	switch (stub->fop) {
	case GF_FOP_OPEN:
	{
		loc_wipe (&stub->args.open.loc);
		if (stub->args.open.fd)
			fd_unref (stub->args.open.fd);
		break;
	}
	case GF_FOP_CREATE:
	{
		loc_wipe (&stub->args.create.loc);
		if (stub->args.create.fd)
			fd_unref (stub->args.create.fd);
		break;
	}
	case GF_FOP_STAT:
	{
		loc_wipe (&stub->args.stat.loc);
		break;
	}
	case GF_FOP_READLINK:
	{
		loc_wipe (&stub->args.readlink.loc);
		break;
	}
  
	case GF_FOP_MKNOD:
	{
		loc_wipe (&stub->args.mknod.loc);
	}
	break;
  
	case GF_FOP_MKDIR:
	{
		loc_wipe (&stub->args.mkdir.loc);
	}
	break;
  
	case GF_FOP_UNLINK:
	{
		loc_wipe (&stub->args.unlink.loc);
	}
	break;

	case GF_FOP_RMDIR:
	{
		loc_wipe (&stub->args.rmdir.loc);
	}
	break;
      
	case GF_FOP_SYMLINK:
	{
		FREE (stub->args.symlink.linkname);
		loc_wipe (&stub->args.symlink.loc);
	}
	break;
  
	case GF_FOP_RENAME:
	{
		loc_wipe (&stub->args.rename.old);
		loc_wipe (&stub->args.rename.new);
	}
	break;

	case GF_FOP_LINK:
	{
		loc_wipe (&stub->args.link.oldloc);
		loc_wipe (&stub->args.link.newloc);
	}
	break;
  
	case GF_FOP_TRUNCATE:
	{
		loc_wipe (&stub->args.truncate.loc);
		break;
	}
      
	case GF_FOP_READ:
	{
		if (stub->args.readv.fd)
			fd_unref (stub->args.readv.fd);
		break;
	}
  
	case GF_FOP_WRITE:
	{
		struct iobref *iobref = stub->args.writev.iobref;
		if (stub->args.writev.fd)
			fd_unref (stub->args.writev.fd);
		FREE (stub->args.writev.vector);
		if (iobref)
			iobref_unref (iobref);
		break;
	}
  
	case GF_FOP_STATFS:
	{
		loc_wipe (&stub->args.statfs.loc);
		break;
	}
	case GF_FOP_FLUSH:
	{
		if (stub->args.flush.fd)
			fd_unref (stub->args.flush.fd);      
		break;
	}
  
	case GF_FOP_FSYNC:
	{
		if (stub->args.fsync.fd)
			fd_unref (stub->args.fsync.fd);
		break;
	}

	case GF_FOP_SETXATTR:
	{
		loc_wipe (&stub->args.setxattr.loc);
		if (stub->args.setxattr.dict)
			dict_unref (stub->args.setxattr.dict);
		break;
	}
  
	case GF_FOP_GETXATTR:
	{
		if (stub->args.getxattr.name)
			FREE (stub->args.getxattr.name);
		loc_wipe (&stub->args.getxattr.loc);
		break;
	}

	case GF_FOP_FSETXATTR:
	{
		fd_unref (stub->args.fsetxattr.fd);
		if (stub->args.fsetxattr.dict)
			dict_unref (stub->args.fsetxattr.dict);
		break;
	}

	case GF_FOP_FGETXATTR:
	{
		if (stub->args.fgetxattr.name)
			FREE (stub->args.fgetxattr.name);
		fd_unref (stub->args.fgetxattr.fd);
		break;
	}

	case GF_FOP_REMOVEXATTR:
	{
		loc_wipe (&stub->args.removexattr.loc);
		FREE (stub->args.removexattr.name);
		break;
	}

	case GF_FOP_OPENDIR:
	{
		loc_wipe (&stub->args.opendir.loc);
		if (stub->args.opendir.fd)
			fd_unref (stub->args.opendir.fd);
		break;
	}

	case GF_FOP_GETDENTS:
	{
		if (stub->args.getdents.fd)
			fd_unref (stub->args.getdents.fd);
		break;
	}

	case GF_FOP_FSYNCDIR:
	{
		if (stub->args.fsyncdir.fd)
			fd_unref (stub->args.fsyncdir.fd);
		break;
	}
  
	case GF_FOP_ACCESS:
	{
		loc_wipe (&stub->args.access.loc);
		break;
	}
  
	case GF_FOP_FTRUNCATE:
	{
		if (stub->args.ftruncate.fd)
			fd_unref (stub->args.ftruncate.fd);
		break;
	}
  
	case GF_FOP_FSTAT:
	{
		if (stub->args.fstat.fd)
			fd_unref (stub->args.fstat.fd);
		break;
	}
  
	case GF_FOP_LK:
	{
		if (stub->args.lk.fd)
			fd_unref (stub->args.lk.fd);
		break;
	}

	case GF_FOP_INODELK:
	{
                if (stub->args.inodelk.volume)
                        FREE (stub->args.inodelk.volume);

		loc_wipe (&stub->args.inodelk.loc);
		break;
	}
	case GF_FOP_FINODELK:
	{
                if (stub->args.finodelk.volume)
                        FREE (stub->args.finodelk.volume);

		if (stub->args.finodelk.fd)
			fd_unref (stub->args.finodelk.fd);
		break;
	}
	case GF_FOP_ENTRYLK:
	{
                if (stub->args.entrylk.volume)
                        FREE (stub->args.entrylk.volume);

		if (stub->args.entrylk.name)
			FREE (stub->args.entrylk.name);
		loc_wipe (&stub->args.entrylk.loc);
		break;
	}
	case GF_FOP_FENTRYLK:
	{
                if (stub->args.fentrylk.volume)
                        FREE (stub->args.fentrylk.volume);

		if (stub->args.fentrylk.name)
			FREE (stub->args.fentrylk.name);

 		if (stub->args.fentrylk.fd)
			fd_unref (stub->args.fentrylk.fd);
		break;
	}
  
	case GF_FOP_LOOKUP:
	{
		loc_wipe (&stub->args.lookup.loc);
		if (stub->args.lookup.xattr_req)
			dict_unref (stub->args.lookup.xattr_req);
		break;
	}

	case GF_FOP_SETDENTS:
	{
		dir_entry_t *entry, *next;
		if (stub->args.setdents.fd)
			fd_unref (stub->args.setdents.fd);
		entry = stub->args.setdents.entries.next;
		while (entry) {
			next = entry->next;
			FREE (entry->name);
			FREE (entry);
			entry = next;
		}
		break;
	}

	case GF_FOP_CHECKSUM:
	{
		loc_wipe (&stub->args.checksum.loc);
		break;
	}

	case GF_FOP_RCHECKSUM:
	{
                if (stub->args.rchecksum.fd)
                        fd_unref (stub->args.rchecksum.fd);
		break;
	}

	case GF_FOP_READDIR:
	{
		if (stub->args.readdir.fd)
			fd_unref (stub->args.readdir.fd);
		break;
	}

        case GF_FOP_READDIRP:
	{
		if (stub->args.readdirp.fd)
			fd_unref (stub->args.readdirp.fd);
		break;
	}

	case GF_FOP_XATTROP:
	{
		loc_wipe (&stub->args.xattrop.loc);
		dict_unref (stub->args.xattrop.xattr);
		break;
	}
	case GF_FOP_FXATTROP:
	{
		if (stub->args.fxattrop.fd)
			fd_unref (stub->args.fxattrop.fd);
		dict_unref (stub->args.fxattrop.xattr);
		break;
	}
	case GF_FOP_LOCK_NOTIFY:
	{
		loc_wipe (&stub->args.lock_notify.loc);
		break;
	}
	case GF_FOP_LOCK_FNOTIFY:
	{
		if (stub->args.lock_fnotify.fd)
			fd_unref (stub->args.lock_fnotify.fd);
		break;
	}
        case GF_FOP_SETATTR:
        {
                loc_wipe (&stub->args.setattr.loc);
                break;
        }
        case GF_FOP_FSETATTR:
        {
                if (stub->args.fsetattr.fd)
                        fd_unref (stub->args.fsetattr.fd);
                break;
        }
	case GF_FOP_MAXVALUE:
	{
		gf_log ("call-stub",
			GF_LOG_DEBUG,
			"Invalid value of FOP");
	}
	break;
	default:
		break;
	}
}


static void
call_stub_destroy_unwind (call_stub_t *stub)
{
	switch (stub->fop) {
	case GF_FOP_OPEN:
	{
		if (stub->args.open_cbk.fd) 
			fd_unref (stub->args.open_cbk.fd);
	}
	break;

	case GF_FOP_CREATE:
	{
		if (stub->args.create_cbk.fd) 
			fd_unref (stub->args.create_cbk.fd);

		if (stub->args.create_cbk.inode)
			inode_unref (stub->args.create_cbk.inode);
	}
	break;

	case GF_FOP_STAT:
		break;

	case GF_FOP_READLINK:
	{
		if (stub->args.readlink_cbk.buf) 
			FREE (stub->args.readlink_cbk.buf);
	}
	break;
  
	case GF_FOP_MKNOD:
	{
		if (stub->args.mknod_cbk.inode)
			inode_unref (stub->args.mknod_cbk.inode);
	}
	break;
  
	case GF_FOP_MKDIR:
	{
		if (stub->args.mkdir_cbk.inode)
			inode_unref (stub->args.mkdir_cbk.inode);
	}
	break;
  
	case GF_FOP_UNLINK:
		break;

	case GF_FOP_RMDIR:
		break;
      
	case GF_FOP_SYMLINK:
	{
		if (stub->args.symlink_cbk.inode) 
			inode_unref (stub->args.symlink_cbk.inode);
	}
	break;
  
	case GF_FOP_RENAME:
		break;

	case GF_FOP_LINK:
	{
		if (stub->args.link_cbk.inode)
			inode_unref (stub->args.link_cbk.inode);
	}
	break;
  
	case GF_FOP_TRUNCATE:
		break;

	case GF_FOP_READ:
	{
		if (stub->args.readv_cbk.op_ret >= 0) {
			struct iobref *iobref = stub->args.readv_cbk.iobref;
			FREE (stub->args.readv_cbk.vector);
			
			if (iobref) {
				iobref_unref (iobref);
			}
		}
	}
	break;

	case GF_FOP_WRITE:
		break;
  
	case GF_FOP_STATFS:
		break;

	case GF_FOP_FLUSH:
		break;
  
	case GF_FOP_FSYNC:
		break;

	case GF_FOP_SETXATTR:
		break;
  
	case GF_FOP_GETXATTR:
	{
		if (stub->args.getxattr_cbk.dict)
			dict_unref (stub->args.getxattr_cbk.dict);
	}
	break;

	case GF_FOP_FSETXATTR:
		break;

	case GF_FOP_FGETXATTR:
	{
		if (stub->args.fgetxattr_cbk.dict)
			dict_unref (stub->args.fgetxattr_cbk.dict);
	}
	break;

	case GF_FOP_REMOVEXATTR:
		break;

	case GF_FOP_OPENDIR:
	{
		if (stub->args.opendir_cbk.fd)
			fd_unref (stub->args.opendir_cbk.fd);
	}
	break;

	case GF_FOP_GETDENTS:
	{
		dir_entry_t *tmp = NULL, *entries = NULL;

		entries = &stub->args.getdents_cbk.entries;
		if (stub->args.getdents_cbk.op_ret >= 0) {
			while (entries->next) {
				tmp = entries->next;
				entries->next = entries->next->next;
				FREE (tmp->name);
				FREE (tmp);
			}
		}
	}
	break;

	case GF_FOP_FSYNCDIR:
		break;
  
	case GF_FOP_ACCESS:
		break;
  
	case GF_FOP_FTRUNCATE:
		break;
  
	case GF_FOP_FSTAT:
		break;
  
	case GF_FOP_LK:
		break;

	case GF_FOP_INODELK:
		break;

	case GF_FOP_FINODELK:
		break;

	case GF_FOP_ENTRYLK:
		break;

	case GF_FOP_FENTRYLK:
		break;

	case GF_FOP_LOOKUP:
	{
		if (stub->args.lookup_cbk.inode)
			inode_unref (stub->args.lookup_cbk.inode);

		if (stub->args.lookup_cbk.dict)
			dict_unref (stub->args.lookup_cbk.dict);
	}
	break;

	case GF_FOP_SETDENTS:
		break;

	case GF_FOP_CHECKSUM:
	{
		if (stub->args.checksum_cbk.op_ret >= 0) {
			FREE (stub->args.checksum_cbk.file_checksum);
			FREE (stub->args.checksum_cbk.dir_checksum); 
		}
	}
  	break;

	case GF_FOP_RCHECKSUM:
	{
		if (stub->args.rchecksum_cbk.op_ret >= 0) {
			FREE (stub->args.rchecksum_cbk.strong_checksum); 
		}
	}
  	break;

	case GF_FOP_READDIR:
	{
		if (stub->args.readdir_cbk.op_ret > 0) {
			gf_dirent_free (&stub->args.readdir_cbk.entries);
		}
	}
	break;

        case GF_FOP_READDIRP:
	{
		if (stub->args.readdirp_cbk.op_ret > 0) {
			gf_dirent_free (&stub->args.readdirp_cbk.entries);
		}
	}
	break;

	case GF_FOP_XATTROP:
	{
		if (stub->args.xattrop_cbk.xattr)
			dict_unref (stub->args.xattrop_cbk.xattr);
	}
	break;

	case GF_FOP_FXATTROP:
	{
		if (stub->args.fxattrop_cbk.xattr) 
			dict_unref (stub->args.fxattrop_cbk.xattr);
	}
	break;
        
        case GF_FOP_SETATTR:
        {
                break;
        }

        case GF_FOP_FSETATTR:
        {
                break;
        }

	case GF_FOP_MAXVALUE:
	{
		gf_log ("call-stub",
			GF_LOG_DEBUG,
			"Invalid value of FOP");
	}
	break;

	default:
		break;
	}
}

 
void
call_stub_destroy (call_stub_t *stub)
{
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
	
	if (stub->wind) {
		call_stub_destroy_wind (stub);
	} else {
		call_stub_destroy_unwind (stub);
	}

	FREE (stub);
out:
	return;
}

void
call_resume (call_stub_t *stub)
{
        xlator_t *old_THIS = NULL;

	errno = EINVAL;
	GF_VALIDATE_OR_GOTO ("call-stub", stub, out);

	list_del_init (&stub->list);

        old_THIS = THIS;
        THIS = stub->frame->this;
        {
                if (stub->wind)
                        call_resume_wind (stub);
                else
                        call_resume_unwind (stub);
        }
        THIS = old_THIS;

	call_stub_destroy (stub);
out:
	return;
}


th: 0.0%;'/> -rw-r--r--scheduler/random/src/random-mem-types.h32
-rw-r--r--scheduler/random/src/random.c305
-rw-r--r--scheduler/random/src/random.h46
-rw-r--r--scheduler/rr/Makefile.am3
-rw-r--r--scheduler/rr/src/Makefile.am13
-rw-r--r--scheduler/rr/src/rr-mem-types.h32
-rw-r--r--scheduler/rr/src/rr-options.c256
-rw-r--r--scheduler/rr/src/rr-options.h34
-rw-r--r--scheduler/rr/src/rr.c567
-rw-r--r--scheduler/rr/src/rr.h70
-rw-r--r--scheduler/switch/Makefile.am3
-rw-r--r--scheduler/switch/src/Makefile.am12
-rw-r--r--scheduler/switch/src/switch-mem-types.h33
-rw-r--r--scheduler/switch/src/switch.c451
-rwxr-xr-xsmoke.sh83
-rw-r--r--tests/README.md27
-rw-r--r--tests/afr.rc15
-rwxr-xr-xtests/basic/bd.t131
-rwxr-xr-xtests/basic/cdc.t135
-rwxr-xr-xtests/basic/file-snapshot.t56
-rwxr-xr-xtests/basic/mgmt_v3-locks.t121
-rwxr-xr-xtests/basic/mount.t78
-rw-r--r--tests/basic/nufa.t32
-rwxr-xr-xtests/basic/posixonly.t30
-rw-r--r--tests/basic/pump.t44
-rwxr-xr-xtests/basic/quota.t51
-rwxr-xr-xtests/basic/rpm.t109
-rw-r--r--tests/basic/self-heald.t48
-rwxr-xr-xtests/basic/volume-snapshot.t83
-rw-r--r--tests/basic/volume-status.t66
-rwxr-xr-xtests/basic/volume.t34
-rwxr-xr-xtests/bugs/859927/repl.t69
-rw-r--r--tests/bugs/886998/strict-readdir.t52
-rw-r--r--tests/bugs/949327.t23
-rwxr-xr-xtests/bugs/bug-000000.t9
-rw-r--r--tests/bugs/bug-1002207.t54
-rwxr-xr-xtests/bugs/bug-1002556.t25
-rw-r--r--tests/bugs/bug-1004218.t26
-rw-r--r--tests/bugs/bug-1004744.t48
-rwxr-xr-xtests/bugs/bug-1015990-rep.t81
-rwxr-xr-xtests/bugs/bug-1015990.t95
-rwxr-xr-xtests/bugs/bug-1022055.t26
-rw-r--r--tests/bugs/bug-1022905.t39
-rw-r--r--tests/bugs/bug-1030208.t35
-rw-r--r--tests/bugs/bug-1040934.t37
-rw-r--r--tests/bugs/bug-1045333.t48
-rwxr-xr-xtests/bugs/bug-1049834.t40
-rwxr-xr-xtests/bugs/bug-1064768.t20
-rwxr-xr-xtests/bugs/bug-762989.t32
-rw-r--r--tests/bugs/bug-764638.t13
-rwxr-xr-xtests/bugs/bug-765230.t60
-rw-r--r--tests/bugs/bug-765380.t39
-rwxr-xr-xtests/bugs/bug-765473.t33
-rw-r--r--tests/bugs/bug-765564.t83
-rwxr-xr-xtests/bugs/bug-767095.t51
-rwxr-xr-xtests/bugs/bug-767585-gfid.t43
-rwxr-xr-xtests/bugs/bug-770655.t168
-rwxr-xr-xtests/bugs/bug-782095.t48
-rwxr-xr-xtests/bugs/bug-797171.t43
-rwxr-xr-xtests/bugs/bug-802417.t108
-rwxr-xr-xtests/bugs/bug-808400-dist.t31
-rw-r--r--tests/bugs/bug-808400-fcntl.c113
-rw-r--r--tests/bugs/bug-808400-flock.c92
-rwxr-xr-xtests/bugs/bug-808400-repl.t30
-rwxr-xr-xtests/bugs/bug-808400-stripe.t31
-rwxr-xr-xtests/bugs/bug-808400.t34
-rwxr-xr-xtests/bugs/bug-811493.t18
-rw-r--r--tests/bugs/bug-821056.t52
-rwxr-xr-xtests/bugs/bug-822830.t44
-rwxr-xr-xtests/bugs/bug-823081.t40
-rw-r--r--tests/bugs/bug-824753-file-locker.c42
-rwxr-xr-xtests/bugs/bug-824753.t45
-rwxr-xr-xtests/bugs/bug-830665.t106
-rw-r--r--tests/bugs/bug-834465.c61
-rwxr-xr-xtests/bugs/bug-834465.t44
-rw-r--r--tests/bugs/bug-839595.t31
-rwxr-xr-xtests/bugs/bug-844688.t37
-rw-r--r--tests/bugs/bug-845213.t19
-rw-r--r--tests/bugs/bug-846240.t58
-rwxr-xr-xtests/bugs/bug-847622.t25
-rwxr-xr-xtests/bugs/bug-847624.t23
-rw-r--r--tests/bugs/bug-848251.t50
-rwxr-xr-xtests/bugs/bug-852147.t85
-rwxr-xr-xtests/bugs/bug-853258.t45
-rwxr-xr-xtests/bugs/bug-853680.t52
-rwxr-xr-xtests/bugs/bug-853690.t94
-rw-r--r--tests/bugs/bug-856455.t42
-rw-r--r--tests/bugs/bug-857330/common.rc55
-rwxr-xr-xtests/bugs/bug-857330/normal.t78
-rwxr-xr-xtests/bugs/bug-857330/xml.t101
-rwxr-xr-xtests/bugs/bug-858215.t81
-rw-r--r--tests/bugs/bug-858242.c77
-rwxr-xr-xtests/bugs/bug-858242.t28
-rw-r--r--tests/bugs/bug-858488-min-free-disk.t114
-rwxr-xr-xtests/bugs/bug-859927.t70
-rw-r--r--tests/bugs/bug-860297.t13
-rw-r--r--tests/bugs/bug-860663.t51
-rw-r--r--tests/bugs/bug-861015-index.t36
-rw-r--r--tests/bugs/bug-861015-log.t29
-rwxr-xr-xtests/bugs/bug-861542.t51
-rwxr-xr-xtests/bugs/bug-862834.t46
-rw-r--r--tests/bugs/bug-862967.t59
-rw-r--r--tests/bugs/bug-863068.t76
-rwxr-xr-xtests/bugs/bug-864222.t26
-rwxr-xr-xtests/bugs/bug-865825.t76
-rw-r--r--tests/bugs/bug-866459.t44
-rw-r--r--tests/bugs/bug-867252.t41
-rw-r--r--tests/bugs/bug-867253.t59
-rw-r--r--tests/bugs/bug-869724.t37
-rwxr-xr-xtests/bugs/bug-872923.t57
-rwxr-xr-xtests/bugs/bug-873367.t41
-rw-r--r--tests/bugs/bug-873549.t17
-rw-r--r--tests/bugs/bug-873962-spb.t39
-rwxr-xr-xtests/bugs/bug-873962.t108
-rw-r--r--tests/bugs/bug-874498.t61
-rwxr-xr-xtests/bugs/bug-877293.t41
-rwxr-xr-xtests/bugs/bug-877885.t35
-rwxr-xr-xtests/bugs/bug-877992.t61
-rw-r--r--tests/bugs/bug-878004.t29
-rwxr-xr-xtests/bugs/bug-879490.t37
-rwxr-xr-xtests/bugs/bug-879494.t37
-rw-r--r--tests/bugs/bug-880898.t23
-rwxr-xr-xtests/bugs/bug-882278.t72
-rw-r--r--tests/bugs/bug-884328.t12
-rw-r--r--tests/bugs/bug-884452.t46
-rwxr-xr-xtests/bugs/bug-884455.t84
-rwxr-xr-xtests/bugs/bug-884597.t152
-rw-r--r--tests/bugs/bug-886998.t52
-rw-r--r--tests/bugs/bug-887098-gmount-crash.t48
-rwxr-xr-xtests/bugs/bug-887145.t89
-rw-r--r--tests/bugs/bug-888174.t65
-rw-r--r--tests/bugs/bug-888752.t24
-rwxr-xr-xtests/bugs/bug-889630.t56
-rw-r--r--tests/bugs/bug-889996.t19
-rwxr-xr-xtests/bugs/bug-892730.t76
-rw-r--r--tests/bugs/bug-893338.t34
-rwxr-xr-xtests/bugs/bug-893378.t73
-rw-r--r--tests/bugs/bug-895235.t23
-rwxr-xr-xtests/bugs/bug-896431.t124
-rwxr-xr-xtests/bugs/bug-902610.t59
-rw-r--r--tests/bugs/bug-903336.t13
-rwxr-xr-xtests/bugs/bug-904065.t90
-rwxr-xr-xtests/bugs/bug-904300.t61
-rw-r--r--tests/bugs/bug-905307.t36
-rw-r--r--tests/bugs/bug-905864.c82
-rw-r--r--tests/bugs/bug-905864.t32
-rw-r--r--tests/bugs/bug-906646.t93
-rwxr-xr-xtests/bugs/bug-907072.t46
-rwxr-xr-xtests/bugs/bug-908146.t39
-rwxr-xr-xtests/bugs/bug-912297.t44
-rwxr-xr-xtests/bugs/bug-912564.t92
-rw-r--r--tests/bugs/bug-913051.t65
-rw-r--r--tests/bugs/bug-913487.t14
-rw-r--r--tests/bugs/bug-913544.t24
-rwxr-xr-xtests/bugs/bug-913555.t54
-rwxr-xr-xtests/bugs/bug-915280.t51
-rwxr-xr-xtests/bugs/bug-915554.t75
-rw-r--r--tests/bugs/bug-916226.t26
-rwxr-xr-xtests/bugs/bug-916549.t19
-rw-r--r--tests/bugs/bug-918437-sh-mtime.t52
-rwxr-xr-xtests/bugs/bug-921072.t118
-rw-r--r--tests/bugs/bug-921231.t31
-rwxr-xr-xtests/bugs/bug-921408.t89
-rwxr-xr-xtests/bugs/bug-924075.t23
-rwxr-xr-xtests/bugs/bug-924265.t35
-rwxr-xr-xtests/bugs/bug-927616.t61
-rw-r--r--tests/bugs/bug-948686.t46
-rw-r--r--tests/bugs/bug-948729/bug-948729-force.t84
-rw-r--r--tests/bugs/bug-948729/bug-948729-mode-script.t85
-rw-r--r--tests/bugs/bug-948729/bug-948729.t67
-rw-r--r--tests/bugs/bug-949242.t54
-rw-r--r--tests/bugs/bug-949298.t12
-rw-r--r--tests/bugs/bug-949930.t27
-rwxr-xr-xtests/bugs/bug-955588.t27
-rw-r--r--tests/bugs/bug-957877.t31
-rw-r--r--tests/bugs/bug-958691.t50
-rw-r--r--tests/bugs/bug-958790.t21
-rw-r--r--tests/bugs/bug-961307.t32
-rw-r--r--tests/bugs/bug-961615.t34
-rw-r--r--tests/bugs/bug-961669.t48
-rwxr-xr-xtests/bugs/bug-963541.t33
-rw-r--r--tests/bugs/bug-963678.t56
-rwxr-xr-xtests/bugs/bug-964059.t30
-rw-r--r--tests/bugs/bug-966018.t34
-rwxr-xr-xtests/bugs/bug-969193.t13
-rwxr-xr-xtests/bugs/bug-970070.t14
-rwxr-xr-xtests/bugs/bug-973073.t48
-rw-r--r--tests/bugs/bug-974007.t52
-rwxr-xr-xtests/bugs/bug-974972.t36
-rw-r--r--tests/bugs/bug-976800.t28
-rw-r--r--tests/bugs/bug-977246.t21
-rwxr-xr-xtests/bugs/bug-977797.t114
-rw-r--r--tests/bugs/bug-978794.t29
-rwxr-xr-xtests/bugs/bug-979365.t47
-rw-r--r--tests/bugs/bug-982174.t36
-rwxr-xr-xtests/bugs/bug-983477.t52
-rw-r--r--tests/bugs/bug-985074.t55
-rw-r--r--tests/bugs/bug-986429.t19
-rwxr-xr-xtests/bugs/bug-986905.t27
-rw-r--r--tests/bugs/bug-991622.t35
-rw-r--r--tests/bugs/getlk_owner.c120
-rwxr-xr-xtests/bugs/overlap.py59
-rwxr-xr-xtests/cluster.rc112
-rw-r--r--tests/dht.rc79
-rw-r--r--tests/fallocate.rc19
-rwxr-xr-xtests/features/glupy.t29
-rwxr-xr-xtests/features/readdir-ahead.t44
-rw-r--r--tests/fileio.rc61
-rw-r--r--tests/include.rc248
-rw-r--r--tests/nfs.rc21
-rwxr-xr-xtests/performance/open-behind.t63
-rw-r--r--tests/performance/quick-read.t55
-rwxr-xr-xtests/snapshot.rc251
-rwxr-xr-xtests/utils/create-files.py207
-rw-r--r--tests/volume.rc325
-rw-r--r--xlators/Makefile.am3
-rw-r--r--xlators/bindings/python/src/Makefile.am2
-rw-r--r--xlators/bindings/python/src/gluster.py23
-rw-r--r--xlators/bindings/python/src/glusterstack.py23
-rw-r--r--xlators/bindings/python/src/glustertypes.py23
-rw-r--r--xlators/bindings/python/src/python.c30
-rw-r--r--xlators/bindings/python/src/testxlator.py25
-rw-r--r--xlators/cluster/afr/src/Makefile.am24
-rw-r--r--xlators/cluster/afr/src/afr-common.c2396
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c423
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h31
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c1264
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h43
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c1389
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h39
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c2113
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h68
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c1294
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h35
-rw-r--r--xlators/cluster/afr/src/afr-open.c292
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c308
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h22
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c1577
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h98
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c1136
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c866
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c488
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h33
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1792
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h65
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c1219
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h48
-rw-r--r--xlators/cluster/afr/src/afr.c419
-rw-r--r--xlators/cluster/afr/src/afr.h592
-rw-r--r--xlators/cluster/afr/src/pump.c628
-rw-r--r--xlators/cluster/afr/src/pump.h31
-rw-r--r--xlators/cluster/dht/src/Makefile.am13
-rw-r--r--xlators/cluster/dht/src/dht-common.c1984
-rw-r--r--xlators/cluster/dht/src/dht-common.h361
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c523
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c96
-rw-r--r--xlators/cluster/dht/src/dht-helper.c501
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c289
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c545
-rw-r--r--xlators/cluster/dht/src/dht-layout.c244
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c182
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h23
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1469
-rw-r--r--xlators/cluster/dht/src/dht-rename.c188
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c567
-rw-r--r--xlators/cluster/dht/src/dht-shared.c758
-rw-r--r--xlators/cluster/dht/src/dht.c500
-rw-r--r--xlators/cluster/dht/src/nufa.c397
-rw-r--r--xlators/cluster/dht/src/switch.c310
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c24
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h21
-rw-r--r--xlators/cluster/ha/src/ha.c32
-rw-r--r--xlators/cluster/ha/src/ha.h24
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c24
-rw-r--r--xlators/cluster/map/src/map-mem-types.h21
-rw-r--r--xlators/cluster/map/src/map.c27
-rw-r--r--xlators/cluster/map/src/map.h22
-rw-r--r--xlators/cluster/stripe/src/Makefile.am11
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c675
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h28
-rw-r--r--xlators/cluster/stripe/src/stripe.c3593
-rw-r--r--xlators/cluster/stripe/src/stripe.h165
-rw-r--r--xlators/cluster/unify/Makefile.am3
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/error-gen/src/Makefile.am7
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c996
-rw-r--r--xlators/debug/error-gen/src/error-gen.h37
-rw-r--r--xlators/debug/io-stats/src/Makefile.am7
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h21
-rw-r--r--xlators/debug/io-stats/src/io-stats.c923
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c3079
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from libglusterfsclient/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h43
-rw-r--r--xlators/encryption/crypt/src/crypt.c4498
-rw-r--r--xlators/encryption/crypt/src/crypt.h899
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c69
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am3
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from scheduler/nufa/Makefile.am)2
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c571
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c515
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c176
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h46
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c693
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h395
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1477
-rw-r--r--xlators/features/compress/Makefile.am (renamed from mod_glusterfs/apache/2.2/Makefile.am)2
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h22
-rw-r--r--xlators/features/compress/src/cdc.c342
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am7
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c24
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1172
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h128
-rw-r--r--xlators/features/glupy/Makefile.am (renamed from scheduler/random/Makefile.am)2
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/src/Makefile.am20
-rw-r--r--xlators/features/glupy/src/debug-trace.py774
-rw-r--r--xlators/features/glupy/src/glupy.c2470
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/gluster.py841
-rw-r--r--xlators/features/glupy/src/helloworld.py19
-rw-r--r--xlators/features/glupy/src/negative.py92
-rw-r--r--xlators/features/index/Makefile.am (renamed from scheduler/alu/Makefile.am)2
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c1489
-rw-r--r--xlators/features/index/src/index.h73
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c424
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c348
-rw-r--r--xlators/features/locks/src/common.h98
-rw-r--r--xlators/features/locks/src/entrylk.c235
-rw-r--r--xlators/features/locks/src/inodelk.c427
-rw-r--r--xlators/features/locks/src/locks-mem-types.h21
-rw-r--r--xlators/features/locks/src/locks.h84
-rw-r--r--xlators/features/locks/src/posix.c1390
-rw-r--r--xlators/features/locks/src/reservelk.c51
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/src/Makefile.am7
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c52
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am8
-rw-r--r--xlators/features/marker/src/marker-common.c37
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h24
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c73
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h27
-rw-r--r--xlators/features/marker/src/marker-quota.c291
-rw-r--r--xlators/features/marker/src/marker-quota.h50
-rw-r--r--xlators/features/marker/src/marker.c870
-rw-r--r--xlators/features/marker/src/marker.h49
-rw-r--r--xlators/features/marker/utils/src/Makefile.am22
-rw-r--r--xlators/features/marker/utils/src/procdiggy.h26
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am6
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py518
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py102
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h20
-rw-r--r--xlators/features/path-convertor/src/path.c29
-rw-r--r--xlators/features/protect/Makefile.am (renamed from mod_glusterfs/apache/1.3/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c215
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c397
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/src/Makefile.am7
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c661
-rw-r--r--xlators/features/quiesce/src/quiesce.h23
-rw-r--r--xlators/features/quota/src/Makefile.am7
-rw-r--r--xlators/features/quota/src/quota-mem-types.h23
-rw-r--r--xlators/features/quota/src/quota.c1277
-rw-r--r--xlators/features/quota/src/quota.h54
-rw-r--r--xlators/features/read-only/src/Makefile.am9
-rw-r--r--xlators/features/read-only/src/read-only-common.c133
-rw-r--r--xlators/features/read-only/src/read-only-common.h76
-rw-r--r--xlators/features/read-only/src/read-only.c22
-rw-r--r--xlators/features/read-only/src/worm.c36
-rw-r--r--xlators/features/trash/src/Makefile.am7
-rw-r--r--xlators/features/trash/src/trash-mem-types.h23
-rw-r--r--xlators/features/trash/src/trash.c111
-rw-r--r--xlators/features/trash/src/trash.h22
-rw-r--r--xlators/lib/src/libxlator.c479
-rw-r--r--xlators/lib/src/libxlator.h104
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h20
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c22
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1632
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c3226
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c2787
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c1134
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c531
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h89
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c637
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c668
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h32
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c924
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c1893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c126
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c4019
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h114
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c140
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c124
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1141
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c1493
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1798
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c212
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h61
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c5590
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c2837
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h158
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1639
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h71
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c6433
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h383
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1647
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h122
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c1753
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c830
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h659
-rw-r--r--xlators/mount/fuse/src/Makefile.am22
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c3336
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h330
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c455
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h23
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c928
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in429
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in3
-rw-r--r--xlators/nfs/server/src/Makefile.am23
-rw-r--r--xlators/nfs/server/src/acl3.c708
-rw-r--r--xlators/nfs/server/src/acl3.h31
-rw-r--r--xlators/nfs/server/src/mount3.c1151
-rw-r--r--xlators/nfs/server/src/mount3.h41
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c230
-rw-r--r--xlators/nfs/server/src/nfs-common.h27
-rw-r--r--xlators/nfs/server/src/nfs-fops.c488
-rw-r--r--xlators/nfs/server/src/nfs-fops.h45
-rw-r--r--xlators/nfs/server/src/nfs-generics.c43
-rw-r--r--xlators/nfs/server/src/nfs-generics.h32
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c73
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h19
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h30
-rw-r--r--xlators/nfs/server/src/nfs.c966
-rw-r--r--xlators/nfs/server/src/nfs.h52
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c183
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h55
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c2744
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h39
-rw-r--r--xlators/nfs/server/src/nfs3.c1031
-rw-r--r--xlators/nfs/server/src/nfs3.h126
-rw-r--r--xlators/nfs/server/src/nlm4.c2525
-rw-r--r--xlators/nfs/server/src/nlm4.h77
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c117
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am8
-rw-r--r--xlators/performance/io-cache/src/io-cache.c572
-rw-r--r--xlators/performance/io-cache/src/io-cache.h30
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c22
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h20
-rw-r--r--xlators/performance/io-cache/src/page.c74
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads.c1020
-rw-r--r--xlators/performance/io-threads/src/io-threads.h33
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h21
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am25
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2303
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1001
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h22
-rw-r--r--xlators/performance/quick-read/src/quick-read.c3598
-rw-r--r--xlators/performance/quick-read/src/quick-read.h61
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/page.c66
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h20
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c537
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h23
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h36
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c4315
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h106
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c56
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h23
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3668
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am11
-rw-r--r--xlators/protocol/auth/addr/src/addr.c36
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am7
-rw-r--r--xlators/protocol/auth/login/src/login.c22
-rw-r--r--xlators/protocol/client/src/Makefile.am13
-rw-r--r--xlators/protocol/client/src/client-callback.c25
-rw-r--r--xlators/protocol/client/src/client-handshake.c996
-rw-r--r--xlators/protocol/client/src/client-helpers.c162
-rw-r--r--xlators/protocol/client/src/client-lk.c419
-rw-r--r--xlators/protocol/client/src/client-mem-types.h23
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c (renamed from xlators/protocol/client/src/client3_1-fops.c)3629
-rw-r--r--xlators/protocol/client/src/client.c839
-rw-r--r--xlators/protocol/client/src/client.h152
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/Makefile.am3
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/Makefile.am3
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am27
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am20
-rw-r--r--xlators/protocol/server/src/authenticate.c109
-rw-r--r--xlators/protocol/server/src/authenticate.h19
-rw-r--r--xlators/protocol/server/src/server-handshake.c208
-rw-r--r--xlators/protocol/server/src/server-helpers.c1378
-rw-r--r--xlators/protocol/server/src/server-helpers.h68
-rw-r--r--xlators/protocol/server/src/server-mem-types.h20
-rw-r--r--xlators/protocol/server/src/server-resolve.c448
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6179
-rw-r--r--xlators/protocol/server/src/server.c765
-rw-r--r--xlators/protocol/server/src/server.h138
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c5234
-rw-r--r--xlators/storage/Makefile.am6
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c527
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c783
-rw-r--r--xlators/storage/bd/src/bd.c2404
-rw-r--r--xlators/storage/bd/src/bd.h178
-rw-r--r--xlators/storage/bdb/Makefile.am3
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c744
-rw-r--r--xlators/storage/posix/src/posix-handle.h143
-rw-r--r--xlators/storage/posix/src/posix-helpers.c900
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c2799
-rw-r--r--xlators/storage/posix/src/posix.h129
-rw-r--r--xlators/system/posix-acl/src/Makefile.am10
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h44
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c511
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h71
1260 files changed, 258382 insertions, 134746 deletions
diff --git a/.gitignore b/.gitignore
index c5371b264..ff253c1da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,26 +8,41 @@ install-sh
ltmain.sh
Makefile.in
missing
+py-compile
*.sw?
*~
-*lo
-*la
-*o
+*.lo
+*.la
+*.o
+*.tar.gz
+*.rpm
.libs
+.deps
Makefile
stamp-h1
# Generated files
-extras/init.d/glusterfs-server.plist
-extras/init.d/glusterfsd-Debian
-extras/init.d/glusterfsd-Redhat
-extras/init.d/glusterfsd-SuSE
-glusterfs.spec
-libtool
-xlators/mount/fuse/utils/mount.glusterfs
-xlators/mount/fuse/utils/mount_glusterfs
+api/examples/__init__.py*
+api/examples/setup.py
argp-standalone/libargp.a
+contrib/uuid/uuid_types.h
+extras/init.d/glusterd-Debian
+extras/init.d/glusterd-Redhat
+extras/init.d/glusterd-SuSE
+extras/init.d/glusterd.plist
+extras/ocf/glusterd
+extras/ocf/volume
+extras/who-wrote-glusterfs/gitdm
+glusterfs-api.pc
+glusterfs.spec
glusterfsd/src/glusterfsd
+libgfchangelog.pc
libglusterfs/src/spec.lex.c
libglusterfs/src/y.tab.c
libglusterfs/src/y.tab.h
+libtool
+run-tests.sh
+ufo/.tox
+ufo/test/unit/.coverage
+xlators/mount/fuse/utils/mount.glusterfs
+xlators/mount/fuse/utils/mount_glusterfs
diff --git a/.mailmap b/.mailmap
new file mode 100644
index 000000000..6bcd95dea
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1,31 @@
+# .mailmap, see 'git short-log --help' for details
+#
+# Listing of contributors that filed patches with different email addresses.
+# Format: <name> <main-email> <alias> [<alias> ...]
+#
+
+Amar Tumballi <amarts@redhat.com> <amar@gluster.com> <amar@del.gluster.com>
+Anand Avati <avati@redhat.com> <avati@gluster.com> <avati@dev.gluster.com> <avati@amp.gluster.com> <avati@blackhole.gluster.com>
+Anush Shetty <ashetty@redhat.com> <anush@gluster.com>
+Csaba Henk <csaba@redhat.com> <csaba@gluster.com> <csaba@lowlife.hu> <csaba@zresearch.com>
+Harshavardhana <fharshav@redhat.com> <harsha@gluster.com> <harsha@zresearch.com> <harsha@dev.gluster.com> <harsha@harshavardhana.net>
+Kaleb S. KEITHLEY <kkeithle@redhat.com> <kkeithle@f16node1.kkeithle.usersys.redhat.com>
+Kaushal M <kaushal@redhat.com> <kaushal@gluster.com>
+Kaushik BV <kbudiger@redhat.com> <kaushikbv@gluster.com>
+Krishna Srinivas <ksriniva@redhat.com> <krishna@gluster.com> <krishna@zresearch.com> <krishna@guest-laptop>
+Krishnan Parthasarathi <kparthas@redhat.com> <kp@gluster.com>
+Louis Zuckerman <louiszuckerman@gmail.com> <me@louiszuckerman.com>
+M S Vishwanath Bhat <vbhat@redhat.com> <msvbhat@gmail.com> <vishwanath@gluster.com>
+Pavan Sondur <pavan@gluster.com> <pavan@dev.gluster.com>
+Pete Zaitcev <zaitcev@kotori.zaitcev.us> <zaitcev@yahoo.com>
+Pranith Kumar K <pkarampu@redhat.com> <pranithk@gluster.com>
+Raghavendra Bhat <raghavendra@redhat.com> <raghavendrabhat@gluster.com>
+Raghavendra G <rgowdapp@redhat.com> <raghavendra@gluster.com> <raghavendra@zresearch.com>
+Rahul C S <rahulcs@redhat.com> <rahulcssjce@gmail.com>
+Rajesh Amaravathi <rajesh@redhat.com> <rajesh@gluster.com> <rajesh.amaravathi@gmail.com>
+Shehjar Tikoo <shehjart@gluster.com> <shehjart@zresearch.com>
+Venky Shankar <vshankar@redhat.com> <venky@gluster.com>
+Vijay Bellur <vbellur@redhat.com> <vijay@gluster.com> <vijay@dev.gluster.com>
+Vijaykumar Koppad <vkoppad@redhat.com> <vijaykumar.koppad@gmail.com>
+Vikas Gorur <vikas@gluster.com> <vikas@zresearch.com>
+shishir gowda <sgowda@redhat.com> <shishirng@gluster.com>
diff --git a/CONTRIBUTING b/CONTRIBUTING
new file mode 100644
index 000000000..7bccd88d7
--- /dev/null
+++ b/CONTRIBUTING
@@ -0,0 +1,25 @@
+ Developer's Certificate of Origin 1.1
+
+ By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+ (b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+ (c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+ (d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
diff --git a/COPYING b/COPYING
deleted file mode 100644
index 5737cfa27..000000000
--- a/COPYING
+++ /dev/null
@@ -1,674 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- <program> Copyright (C) <year> <name of author>
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/COPYING-GPLV2 b/COPYING-GPLV2
new file mode 100644
index 000000000..d159169d1
--- /dev/null
+++ b/COPYING-GPLV2
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/COPYING-LGPLV3 b/COPYING-LGPLV3
new file mode 100644
index 000000000..65c5ca88a
--- /dev/null
+++ b/COPYING-LGPLV3
@@ -0,0 +1,165 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/EXCEPTIONS b/EXCEPTIONS
deleted file mode 100644
index 4f1845cd4..000000000
--- a/EXCEPTIONS
+++ /dev/null
@@ -1,14 +0,0 @@
-GlusterFS exceptions to GNU GPL
-
-Copyright © 2011 Gluster, Inc.
-
-Everyone is permitted to copy and distribute verbatim copies of this license
-document, but changing it is not allowed.
-Gluster has developed the APIs and the libraries in its code to enable third
-parties to link their software to GlusterFS software without requiring that
-such third party software be licensed under the GNU General Public License
-version 3 ("GNU GPL3"). Consequently, Gluster does not view the linking of
-third party software to GlusterFS through APIs or the libraries as requiring
-the combination to be distributed under GNU GPLv3. However, your reproduction,
-modification or other use of the GlusterFS software alone must comply with the
-terms of GNU GPL3.
diff --git a/Makefile.am b/Makefile.am
index 6a5bc0251..598ebb410 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,8 +1,18 @@
-EXTRA_DIST = autogen.sh COPYING INSTALL README AUTHORS THANKS NEWS EXCEPTIONS glusterfs.spec
+EXTRA_DIST = autogen.sh \
+ COPYING-GPLV2 COPYING-LGPLV3 \
+ INSTALL README AUTHORS THANKS NEWS \
+ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \
+ error-codes.json gf-error-codes.h.template \
+ gen-headers.py run-tests.sh \
+ $(shell find $(top_srcdir)/tests -type f -print)
-SUBDIRS = argp-standalone libglusterfs rpc xlators glusterfsd $(FUSERMOUNT_SUBDIR) doc extras cli
+SUBDIRS = argp-standalone libglusterfs rpc api xlators glusterfsd \
+ $(FUSERMOUNT_SUBDIR) doc extras cli @SYNCDAEMON_SUBDIR@
-CLEANFILES =
+pkgconfigdir = @pkgconfigdir@
+pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
+
+CLEANFILES =
gitclean: distclean
find . -name Makefile.in -exec rm -f {} \;
@@ -11,4 +21,9 @@ gitclean: distclean
rm -fr autom4te.cache
rm -f missing aclocal.m4 config.h.in config.guess config.sub ltmain.sh install-sh configure depcomp
rm -fr argp-standalone/autom4te.cache
- rm -f argp-standalone/aclocal.m4 argp-standalone/config.h.in argp-standalone/configure argp-standalone/depcomp argp-standalone/install-sh argp-standalone/missing
+ rm -f argp-standalone/aclocal.m4 argp-standalone/config.h.in
+ rm -f argp-standalone/configure argp-standalone/depcomp
+ rm -f argp-standalone/install-sh argp-standalone/missing
+
+dist-hook:
+ (cd $(srcdir) && git diff && echo ===== git log ==== && git log) > $(distdir)/ChangeLog
diff --git a/THANKS b/THANKS
index ea9816662..e1ce5105d 100644
--- a/THANKS
+++ b/THANKS
@@ -1,3 +1 @@
-
-For all of you, who use the product and help us making it more robust, useful, popular.
-
+For all of you, who use the product and help us making it more robust, useful, popular..
diff --git a/api/Makefile.am b/api/Makefile.am
new file mode 100644
index 000000000..f0ad1ee97
--- /dev/null
+++ b/api/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src examples
diff --git a/api/examples/Makefile.am b/api/examples/Makefile.am
new file mode 100644
index 000000000..05f40ff53
--- /dev/null
+++ b/api/examples/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_PROGRAMS = glfsxmp
+glfsxmp_SOURCES = glfsxmp.c
+glfsxmp_CFLAGS = $(GLFS_CFLAGS) -Wall
+glfsxmp_LDADD = $(GLFS_LIBS) -lrt
+
+EXTRA_DIST = gfapi.py
diff --git a/api/examples/README b/api/examples/README
new file mode 100644
index 000000000..4d2b521f7
--- /dev/null
+++ b/api/examples/README
@@ -0,0 +1,36 @@
+This is an example application which uses libgfapi. It is
+a complete autotools based build system which demonstrates the
+required changes in configure.ac, Makefile.am etc to successfuly
+detect for and build an application against libgfapi.
+
+There are two approaches to building a libgfapi based application:
+
+1. In the presence of pkg-config in your build system.
+This is the recommended approach which is also used in this example.
+For this approach to work, you need to build glusterfs by passing
+--pkgconfigdir=/usr/lib64/pkgconfig (or the appropriate directory)
+in your distro. This already happens if you build RPMs with the
+glusterfs.spec provided in glusterfs.git. You will also need to
+install glusterfs-api RPM.
+
+2. In the absence of pkg-config in your build system.
+Make sure your LDFLAGS includes -L/path/to/lib where libgfapi.so is
+installed and -I/path/to/include/glusterfs where the 'api' directory
+containing the headers are available.
+
+glfsxmp.c
+=========
+
+glfsxmp.c is an example application which uses libgfapi
+
+Compilation Steps For glfsxmp.c
+===============================
+
+1. $./autogen.sh
+2. $./configure
+
+Note: Before running ./configure , as mentioned above, you need to
+ take care of #1 or #2 i.e. pkg-config path or LDFLAGS and
+ -I/<path> with correct values.
+
+3. $make glfsxmp
diff --git a/api/examples/autogen.sh b/api/examples/autogen.sh
new file mode 100755
index 000000000..1fee6be11
--- /dev/null
+++ b/api/examples/autogen.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+aclocal
+autoconf
+automake --foreign
diff --git a/api/examples/configure.ac b/api/examples/configure.ac
new file mode 100644
index 000000000..b80177a4e
--- /dev/null
+++ b/api/examples/configure.ac
@@ -0,0 +1,12 @@
+
+AC_INIT([glfs-test],[0.1],[gluster-devel@nongu.org])
+
+AM_INIT_AUTOMAKE
+
+AC_CONFIG_FILES([Makefile])
+
+AC_PROG_CC
+
+PKG_CHECK_MODULES([GLFS], [glusterfs-api >= 3])
+
+AC_OUTPUT
diff --git a/api/examples/gfapi.py b/api/examples/gfapi.py
new file mode 100755
index 000000000..3ac67f4d5
--- /dev/null
+++ b/api/examples/gfapi.py
@@ -0,0 +1,422 @@
+#!/usr/bin/python
+
+from ctypes import *
+from ctypes.util import find_library
+import os
+import sys
+import time
+import types
+
+# Looks like ctypes is having trouble with dependencies, so just force them to
+# load with RTLD_GLOBAL until I figure that out.
+glfs = CDLL(find_library("glusterfs"),RTLD_GLOBAL)
+xdr = CDLL(find_library("gfxdr"),RTLD_GLOBAL)
+api = CDLL(find_library("gfapi"),RTLD_GLOBAL)
+
+# Wow, the Linux kernel folks really play nasty games with this structure. If
+# you look at the man page for stat(2) and then at this definition you'll note
+# two discrepancies. First, we seem to have st_nlink and st_mode reversed. In
+# fact that's exactly how they're defined *for 64-bit systems*; for 32-bit
+# they're in the man-page order. Even uglier, the man page makes no mention of
+# the *nsec fields, but they are very much present and if they're not included
+# then we get memory corruption because libgfapi has a structure definition
+# that's longer than ours and they overwrite some random bit of memory after
+# the space we allocated. Yes, that's all very disgusting, and I'm still not
+# sure this will really work on 32-bit because all of the field types are so
+# obfuscated behind macros and feature checks.
+class Stat (Structure):
+ _fields_ = [
+ ("st_dev", c_ulong),
+ ("st_ino", c_ulong),
+ ("st_nlink", c_ulong),
+ ("st_mode", c_uint),
+ ("st_uid", c_uint),
+ ("st_gid", c_uint),
+ ("st_rdev", c_ulong),
+ ("st_size", c_ulong),
+ ("st_blksize", c_ulong),
+ ("st_blocks", c_ulong),
+ ("st_atime", c_ulong),
+ ("st_atimensec", c_ulong),
+ ("st_mtime", c_ulong),
+ ("st_mtimensec", c_ulong),
+ ("st_ctime", c_ulong),
+ ("st_ctimensec", c_ulong),
+ ]
+api.glfs_creat.restype = c_void_p
+api.glfs_open.restype = c_void_p
+api.glfs_lstat.restype = c_int
+api.glfs_lstat.argtypes = [c_void_p, c_char_p, POINTER(Stat)]
+
+class Dirent (Structure):
+ _fields_ = [
+ ("d_ino", c_ulong),
+ ("d_off", c_ulong),
+ ("d_reclen", c_ushort),
+ ("d_type", c_char),
+ ("d_name", c_char * 256),
+ ]
+api.glfs_opendir.restype = c_void_p
+api.glfs_readdir_r.restype = c_int
+api.glfs_readdir_r.argtypes = [c_void_p, POINTER(Dirent),
+ POINTER(POINTER(Dirent))]
+
+# There's a bit of ctypes glitchiness around __del__ functions and module-level
+# variables. If we unload the module while we still have references to File or
+# Volume objects, the module-level variables might have disappeared by the time
+# __del__ gets called. Therefore the objects hold references which they
+# release when __del__ is done. We only actually use the object-local values
+# in __del__; for clarity, we just use the simpler module-level form elsewhere.
+
+class File(object):
+
+ def __init__ (self, fd):
+ # Add a reference so the module-level variable "api" doesn't
+ # get yanked out from under us (see comment above File def'n).
+ self._api = api
+ self.fd = fd
+
+ def __del__ (self):
+ self._api.glfs_close(self.fd)
+ self._api = None
+
+ # File operations, in alphabetical order.
+
+ def fsync (self):
+ return api.glfs_fsync(self.fd)
+
+ def read (self, buflen, flags=0):
+ rbuf = create_string_buffer(buflen)
+ rc = api.glfs_read(self.fd,rbuf,buflen,flags)
+ if rc > 0:
+ return rbuf.value[:rc]
+ else:
+ return rc
+
+ def read_buffer (self, buf, flags=0):
+ return api.glfs_read(self.fd,buf,len(buf),flags)
+
+ def write (self, data, flags=0):
+ return api.glfs_write(self.fd,data,len(data),flags)
+
+ def fallocate (self, mode, offset, len):
+ return api.glfs_fallocate(self.fd, mode, offset, len)
+
+ def discard (self, offset, len):
+ return api.glfs_discard(self.fd, offset, len)
+
+
+class Dir(object):
+
+ def __init__ (self, fd):
+ # Add a reference so the module-level variable "api" doesn't
+ # get yanked out from under us (see comment above File def'n).
+ self._api = api
+ self.fd = fd
+ self.cursor = POINTER(Dirent)()
+
+ def __del__ (self):
+ self._api.glfs_closedir(self.fd)
+ self._api = None
+
+ def next (self):
+ entry = Dirent()
+ entry.d_reclen = 256
+ rc = api.glfs_readdir_r(self.fd,byref(entry),byref(self.cursor))
+ if (rc < 0) or (not self.cursor) or (not self.cursor.contents):
+ return rc
+ return entry
+
+class Volume(object):
+
+ # Housekeeping functions.
+
+ def __init__ (self, host, volid, proto="tcp", port=24007):
+ # Add a reference so the module-level variable "api" doesn't
+ # get yanked out from under us (see comment above File def'n).
+ self._api = api
+ self.fs = api.glfs_new(volid)
+ api.glfs_set_volfile_server(self.fs,proto,host,port)
+
+ def __del__ (self):
+ self._api.glfs_fini(self.fs)
+ self._api = None
+
+ def set_logging (self, path, level):
+ api.glfs_set_logging(self.fs,path,level)
+
+ def mount (self):
+ api.glfs_init(self.fs)
+
+ # File operations, in alphabetical order.
+
+ def creat (self, path, flags, mode):
+ fd = api.glfs_creat(self.fs,path,flags,mode)
+ if not fd:
+ return fd
+ return File(fd)
+
+ def getxattr (self, path, key, maxlen):
+ buf = create_string_buffer(maxlen)
+ rc = api.glfs_getxattr(self.fs,path,key,buf,maxlen)
+ if rc < 0:
+ return rc
+ return buf.value[:rc]
+
+ def listxattr (self, path):
+ buf = create_string_buffer(512)
+ rc = api.glfs_listxattr(self.fs,path,buf,512)
+ if rc < 0:
+ return rc
+ xattrs = []
+ # Parsing character by character is ugly, but it seems like the
+ # easiest way to deal with the "strings separated by NUL in one
+ # buffer" format.
+ i = 0
+ while i < rc:
+ new_xa = buf.raw[i]
+ i += 1
+ while i < rc:
+ next_char = buf.raw[i]
+ i += 1
+ if next_char == '\0':
+ xattrs.append(new_xa)
+ break
+ new_xa += next_char
+ xattrs.sort()
+ return xattrs
+
+ def lstat (self, path):
+ x = Stat()
+ rc = api.glfs_lstat(self.fs,path,byref(x))
+ if rc >= 0:
+ return x
+ else:
+ return rc
+
+ def mkdir (self, path):
+ return api.glfs_mkdir(self.fs,path)
+
+ def open (self, path, flags):
+ fd = api.glfs_open(self.fs,path,flags)
+ if not fd:
+ return fd
+ return File(fd)
+
+ def opendir (self, path):
+ fd = api.glfs_opendir(self.fs,path)
+ if not fd:
+ return fd
+ return Dir(fd)
+
+ def rename (self, opath, npath):
+ return api.glfs_rename(self.fs,opath,npath)
+
+ def rmdir (self, path):
+ return api.glfs_rmdir(self.fs,path)
+
+ def setxattr (self, path, key, value, vlen):
+ return api.glfs_setxattr(self.fs,path,key,value,vlen,0)
+
+ def unlink (self, path):
+ return api.glfs_unlink(self.fs,path)
+
+if __name__ == "__main__":
+ def test_create_write (vol, path, data):
+ mypath = path + ".io"
+ fd = vol.creat(mypath,os.O_WRONLY|os.O_EXCL,0644)
+ if not fd:
+ return False, "creat error"
+ rc = fd.write(data)
+ if rc != len(data):
+ return False, "wrote %d/%d bytes" % (rc, len(data))
+ return True, "wrote %d bytes" % rc
+
+ # TBD: this test fails if we do create, open, write, read
+ def test_open_read (vol, path, data):
+ mypath = path + ".io"
+ fd = vol.open(mypath,os.O_RDONLY)
+ if not fd:
+ return False, "open error"
+ dlen = len(data) * 2
+ buf = fd.read(dlen)
+ if type(buf) == types.IntType:
+ return False, "read error %d" % buf
+ if len(buf) != len(data):
+ return False, "read %d/%d bytes" % (len(buf), len(data))
+ return True, "read '%s'" % buf
+
+ def test_lstat (vol, path, data):
+ mypath = path + ".io"
+ sb = vol.lstat(mypath)
+ if type(sb) == types.IntType:
+ return False, "lstat error %d" % sb
+ if sb.st_size != len(data):
+ return False, "lstat size is %d, expected %d" % (
+ sb.st_size, len(data))
+ return True, "lstat got correct size %d" % sb.st_size
+
+ def test_rename (vol, path, data):
+ opath = path + ".io"
+ npath = path + ".tmp"
+ rc = vol.rename(opath,npath)
+ if rc < 0:
+ return False, "rename error %d" % rc
+ ofd = vol.open(opath,os.O_RDWR)
+ if isinstance(ofd,File):
+ return False, "old path working after rename"
+ nfd = vol.open(npath,os.O_RDWR)
+ if isinstance(nfd,File):
+ return False, "new path not working after rename"
+ return True, "rename worked"
+
+ def test_unlink (vol, path, data):
+ mypath = path + ".tmp"
+ rc = vol.unlink(mypath)
+ if rc < 0:
+ return False, "unlink error %d" % fd
+ fd = vol.open(mypath,os.O_RDWR)
+ if isinstance(fd,File):
+ return False, "path still usable after unlink"
+ return True, "unlink worked"
+
+ def test_mkdir (vol, path, data):
+ mypath = path + ".dir"
+ rc = vol.mkdir(mypath)
+ if rc < 0:
+ return False, "mkdir error %d" % rc
+ return True, "mkdir worked"
+
+ def test_create_in_dir (vol, path, data):
+ mypath = path + ".dir/probe"
+ fd = vol.creat(mypath,os.O_RDWR,0644)
+ if not isinstance(fd,File):
+ return False, "create (in dir) error"
+ return True, "create (in dir) worked"
+
+ def test_dir_listing (vol, path, data):
+ mypath = path + ".dir"
+ fd = vol.opendir(mypath)
+ if not isinstance(fd,Dir):
+ return False, "opendir error %d" % fd
+ files = []
+ while True:
+ ent = fd.next()
+ if not isinstance(ent,Dirent):
+ break
+ name = ent.d_name[:ent.d_reclen]
+ files.append(name)
+ if files != [".", "..", "probe"]:
+ return False, "wrong directory contents"
+ return True, "directory listing worked"
+
+ def test_unlink_in_dir (vol, path, data):
+ mypath = path + ".dir/probe"
+ rc = vol.unlink(mypath)
+ if rc < 0:
+ return False, "unlink (in dir) error %d" % rc
+ return True, "unlink (in dir) worked"
+
+ def test_rmdir (vol, path, data):
+ mypath = path + ".dir"
+ rc = vol.rmdir(mypath)
+ if rc < 0:
+ return False, "rmdir error %d" % rc
+ sb = vol.lstat(mypath)
+ if not isinstance(sb,Stat):
+ return False, "dir still there after rmdir"
+ return True, "rmdir worked"
+
+ def test_setxattr (vol, path, data):
+ mypath = path + ".xa"
+ fd = vol.creat(mypath,os.O_RDWR|os.O_EXCL,0644)
+ if not fd:
+ return False, "creat (xattr test) error"
+ key1, key2 = "hello", "goodbye"
+ if vol.setxattr(mypath,"trusted.key1",key1,len(key1)) < 0:
+ return False, "setxattr (key1) error"
+ if vol.setxattr(mypath,"trusted.key2",key2,len(key2)) < 0:
+ return False, "setxattr (key2) error"
+ return True, "setxattr worked"
+
+ def test_getxattr (vol, path, data):
+ mypath = path + ".xa"
+ buf = vol.getxattr(mypath,"trusted.key1",32)
+ if type(buf) == types.IntType:
+ return False, "getxattr error"
+ if buf != "hello":
+ return False, "wrong getxattr value %s" % buf
+ return True, "getxattr worked"
+
+ def test_listxattr (vol, path, data):
+ mypath = path + ".xa"
+ xattrs = vol.listxattr(mypath)
+ if type(xattrs) == types.IntType:
+ return False, "listxattr error"
+ if xattrs != ["trusted.key1","trusted.key2"]:
+ return False, "wrong listxattr value %s" % repr(xattrs)
+ return True, "listxattr worked"
+
+ def test_fallocate (vol, path, data):
+ mypath = path + ".io"
+ fd = vol.creat(mypath,os.O_WRONLY|os.O_EXCL,0644)
+ if not fd:
+ return False, "creat error"
+ rc = fd.fallocate(0, 0, 1024*1024)
+ if rc != 0:
+ return False, "fallocate error"
+ rc = fd.discard(4096, 4096)
+ if rc != 0:
+ return False, "discard error"
+ return True, "fallocate/discard worked"
+
+ test_list = (
+ test_create_write,
+ test_open_read,
+ test_lstat,
+ test_rename,
+ test_unlink,
+ test_mkdir,
+ test_create_in_dir,
+ test_dir_listing,
+ test_unlink_in_dir,
+ test_rmdir,
+ test_setxattr,
+ test_getxattr,
+ test_listxattr,
+ test_fallocate,
+ )
+
+ ok_to_fail = (
+ # TBD: this fails opening the new file, even though the file
+ # did get renamed. Looks like a gfapi bug, not ours.
+ (test_rename, "new path not working after rename"),
+ # TBD: similar, call returns error even though it worked
+ (test_rmdir, "dir still there after rmdir"),
+ )
+
+ volid, path = sys.argv[1:3]
+ data = "fubar"
+ vol = Volume("localhost",volid)
+ vol.set_logging("/dev/null",7)
+ #vol.set_logging("/dev/stderr",7)
+ vol.mount()
+
+ failures = 0
+ expected = 0
+ for t in test_list:
+ rc, msg = t(vol,path,data)
+ if rc:
+ print "PASS: %s" % msg
+ else:
+ print "FAIL: %s" % msg
+ failures += 1
+ for otf in ok_to_fail:
+ if (t == otf[0]) and (msg == otf[1]):
+ print " (skipping known failure)"
+ expected += 1
+ break # from the *inner* for loop
+ else:
+ break # from the *outer* for loop
+
+ print "%d failures (%d expected)" % (failures, expected)
diff --git a/api/examples/glfsxmp.c b/api/examples/glfsxmp.c
new file mode 100644
index 000000000..600d72fb5
--- /dev/null
+++ b/api/examples/glfsxmp.c
@@ -0,0 +1,1598 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "api/glfs.h"
+#include "api/glfs-handles.h"
+#include <string.h>
+#include <time.h>
+
+
+int
+test_dirops (glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ struct dirent *entry = NULL;
+
+ fd = glfs_opendir (fs, "/");
+ if (!fd) {
+ fprintf (stderr, "/: %s\n", strerror (errno));
+ return -1;
+ }
+
+ fprintf (stderr, "Entries:\n");
+ while (glfs_readdir_r (fd, (struct dirent *)buf, &entry), entry) {
+ fprintf (stderr, "%s: %lu\n", entry->d_name, glfs_telldir (fd));
+ }
+
+ glfs_closedir (fd);
+ return 0;
+}
+
+
+int
+test_xattr (glfs_t *fs)
+{
+ char *filename = "/filename2";
+ char buf[512];
+ char *ptr;
+ int ret;
+
+ ret = glfs_setxattr (fs, filename, "user.testkey", "testval", 8, 0);
+ fprintf (stderr, "setxattr(%s): %d (%s)\n", filename, ret,
+ strerror (errno));
+
+ ret = glfs_setxattr (fs, filename, "user.testkey2", "testval", 8, 0);
+ fprintf (stderr, "setxattr(%s): %d (%s)\n", filename, ret,
+ strerror (errno));
+
+ ret = glfs_listxattr (fs, filename, buf, 512);
+ fprintf (stderr, "listxattr(%s): %d (%s)\n", filename, ret,
+ strerror (errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf ("key=%s\n", ptr);
+ ptr += strlen (ptr);
+ }
+
+ return 0;
+}
+
+
+int
+test_chdir (glfs_t *fs)
+{
+ int ret = -1;
+ char *topdir = "/topdir";
+ char *linkdir = "/linkdir";
+ char *subdir = "./subdir";
+ char *respath = NULL;
+ char pathbuf[4096];
+
+ ret = glfs_mkdir (fs, topdir, 0755);
+ if (ret) {
+ fprintf (stderr, "mkdir(%s): %s\n", topdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd (fs, pathbuf, 4096);
+ fprintf (stdout, "getcwd() = %s\n", respath);
+
+ ret = glfs_symlink (fs, topdir, linkdir);
+ if (ret) {
+ fprintf (stderr, "symlink(%s, %s): %s\n", topdir, linkdir, strerror (errno));
+ return -1;
+ }
+
+ ret = glfs_chdir (fs, linkdir);
+ if (ret) {
+ fprintf (stderr, "chdir(%s): %s\n", linkdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd (fs, pathbuf, 4096);
+ fprintf (stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath (fs, subdir, pathbuf);
+ if (respath) {
+ fprintf (stderr, "realpath(%s) worked unexpectedly: %s\n", subdir, respath);
+ return -1;
+ }
+
+ ret = glfs_mkdir (fs, subdir, 0755);
+ if (ret) {
+ fprintf (stderr, "mkdir(%s): %s\n", subdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_realpath (fs, subdir, pathbuf);
+ if (!respath) {
+ fprintf (stderr, "realpath(%s): %s\n", subdir, strerror (errno));
+ } else {
+ fprintf (stdout, "realpath(%s) = %s\n", subdir, respath);
+ }
+
+ ret = glfs_chdir (fs, subdir);
+ if (ret) {
+ fprintf (stderr, "chdir(%s): %s\n", subdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd (fs, pathbuf, 4096);
+ fprintf (stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath (fs, "/linkdir/subdir", pathbuf);
+ if (!respath) {
+ fprintf (stderr, "realpath(/linkdir/subdir): %s\n", strerror (errno));
+ } else {
+ fprintf (stdout, "realpath(/linkdir/subdir) = %s\n", respath);
+ }
+
+ return 0;
+}
+
+#ifdef DEBUG
+static void
+peek_stat (struct stat *sb)
+{
+ printf ("Dumping stat information:\n");
+ printf ("File type: ");
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFBLK: printf ("block device\n"); break;
+ case S_IFCHR: printf ("character device\n"); break;
+ case S_IFDIR: printf ("directory\n"); break;
+ case S_IFIFO: printf ("FIFO/pipe\n"); break;
+ case S_IFLNK: printf ("symlink\n"); break;
+ case S_IFREG: printf ("regular file\n"); break;
+ case S_IFSOCK: printf ("socket\n"); break;
+ default: printf ("unknown?\n"); break;
+ }
+
+ printf ("I-node number: %ld\n", (long) sb->st_ino);
+
+ printf ("Mode: %lo (octal)\n",
+ (unsigned long) sb->st_mode);
+
+ printf ("Link count: %ld\n", (long) sb->st_nlink);
+ printf ("Ownership: UID=%ld GID=%ld\n",
+ (long) sb->st_uid, (long) sb->st_gid);
+
+ printf ("Preferred I/O block size: %ld bytes\n",
+ (long) sb->st_blksize);
+ printf ("File size: %lld bytes\n",
+ (long long) sb->st_size);
+ printf ("Blocks allocated: %lld\n",
+ (long long) sb->st_blocks);
+
+ printf ("Last status change: %s", ctime(&sb->st_ctime));
+ printf ("Last file access: %s", ctime(&sb->st_atime));
+ printf ("Last file modification: %s", ctime(&sb->st_mtime));
+
+ return;
+}
+
+static void
+peek_handle (unsigned char *glid)
+{
+ int i;
+
+ for (i = 0; i < GFAPI_HANDLE_LENGTH; i++)
+ {
+ printf (":%02x:", glid[i]);
+ }
+ printf ("\n");
+}
+#else /* DEBUG */
+static void
+peek_stat (struct stat *sb)
+{
+ return;
+}
+
+static void
+peek_handle (unsigned char *id)
+{
+ return;
+}
+#endif /* DEBUG */
+
+glfs_t *fs = NULL;
+char *full_parent_name = "/testdir", *parent_name = "testdir";
+
+void
+test_h_unlink (void)
+{
+ char *my_dir = "unlinkdir";
+ char *my_file = "file.txt";
+ char *my_subdir = "dir1";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL,
+ *subdir = NULL, *subleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf ("glfs_h_unlink tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dir = glfs_h_mkdir (fs, parent, my_dir, 0644, &sb);
+ if (dir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat (fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subdir = glfs_h_mkdir (fs, dir, my_subdir, 0644, &sb);
+ if (subdir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_subdir, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subleaf = glfs_h_creat (fs, subdir, my_file, O_CREAT, 0644, &sb);
+ if (subleaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, subdir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non empty directory */
+ ret = glfs_h_unlink (fs, dir, my_subdir);
+ if ((ret && errno != ENOTEMPTY) || (ret == 0)) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: it is non empty: %s\n",
+ my_subdir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink (fs, subdir, my_file);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, subdir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink (fs, dir, my_subdir);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_subdir, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink (fs, dir, my_file);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existant regular file */
+ ret = glfs_h_unlink (fs, dir, my_file);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking non-existant %s: invalid errno ,%d, %s\n",
+ my_file, ret, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existant directory */
+ ret = glfs_h_unlink (fs, dir, my_subdir);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking non-existant %s: invalid errno ,%d, %s\n",
+ my_subdir, ret, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink (fs, parent, my_dir);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ printf ("glfs_h_unlink tests: PASSED\n");
+
+out:
+ if (dir)
+ glfs_h_close (dir);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (subdir)
+ glfs_h_close (subdir);
+ if (subleaf)
+ glfs_h_close (subleaf);
+ if (parent)
+ glfs_h_close (parent);
+
+ return;
+}
+
+void
+test_h_getsetattrs (void)
+{
+ char *my_dir = "attrdir";
+ char *my_file = "attrfile.txt";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb, retsb;
+ int ret, valid;
+ struct timespec timestamp;
+
+ printf("glfs_h_getattrs and setattrs tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dir = glfs_h_mkdir (fs, parent, my_dir, 0644, &sb);
+ if (dir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ ret = glfs_h_getattrs (fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&retsb);
+ /* TODO: Compare stat information */
+
+ retsb.st_mode = 00666;
+ retsb.st_uid = 1000;
+ retsb.st_gid = 1001;
+ ret = clock_gettime (CLOCK_REALTIME, &timestamp);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ retsb.st_atim = timestamp;
+ retsb.st_mtim = timestamp;
+ valid = GFAPI_SET_ATTR_MODE | GFAPI_SET_ATTR_UID | GFAPI_SET_ATTR_GID |
+ GFAPI_SET_ATTR_ATIME | GFAPI_SET_ATTR_MTIME;
+ peek_stat (&retsb);
+
+ ret = glfs_h_setattrs (fs, dir, &retsb, valid);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_setattrs: error %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ memset(&retsb, 0, sizeof (struct stat));
+ ret = glfs_h_stat (fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_stat: error %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&retsb);
+
+ printf ("glfs_h_getattrs and setattrs tests: PASSED\n");
+out:
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (dir)
+ glfs_h_close (dir);
+
+ return;
+}
+
+void
+test_h_truncate (void)
+{
+ char *my_dir = "truncatedir";
+ char *my_file = "file.txt";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL;
+ struct stat sb;
+ glfs_fd_t *fd = NULL;
+ char buf[32];
+ off_t offset = 0;
+ int ret = 0;
+
+ printf("glfs_h_truncate tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_mkdir (fs, root, my_dir, 0644, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ fd = glfs_h_open (fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_h_open: error on open of %s: %s\n",
+ my_file, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ memcpy (buf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write (fd, buf, 32, 0);
+
+ /* run tests */
+ /* truncate lower */
+ offset = 30;
+ ret = glfs_h_truncate (fs, leaf, offset);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs (fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error for %s (%p),%s\n",
+ my_file, leaf, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf (stderr, "glfs_h_truncate: post size mismatch\n");
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate higher */
+ offset = 32;
+ ret = glfs_h_truncate (fs, leaf, offset);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs (fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error for %s (%p),%s\n",
+ my_file, leaf, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf (stderr, "glfs_h_truncate: post size mismatch\n");
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate equal */
+ offset = 30;
+ ret = glfs_h_truncate (fs, leaf, offset);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs (fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error for %s (%p),%s\n",
+ my_file, leaf, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf (stderr, "glfs_h_truncate: post size mismatch\n");
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ printf ("glfs_h_truncate tests: PASSED\n");
+out:
+ if (fd)
+ glfs_close (fd);
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+
+ return;
+}
+
+void
+test_h_links (void)
+{
+ char *my_dir = "linkdir";
+ char *my_file = "file.txt";
+ char *my_symlnk = "slnk.txt";
+ char *my_lnk = "lnk.txt";
+ char *linksrc_dir = "dir1";
+ char *linktgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct glfs_object *ln1 = NULL;
+ struct stat sb;
+ int ret;
+ char *buf = NULL;
+
+ printf("glfs_h_link(s) tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_mkdir (fs, root, my_dir, 0644, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirsrc = glfs_h_mkdir (fs, parent, linksrc_dir, 0644, &sb);
+ if (dirsrc == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linksrc_dir, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirtgt = glfs_h_mkdir (fs, parent, linktgt_dir, 0644, &sb);
+ if (dirtgt == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linktgt_dir, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dleaf = glfs_h_creat (fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* run tests */
+ /* sym link: /testdir/linkdir/file.txt to ./slnk.txt */
+ ln1 = glfs_h_symlink (fs, parent, my_symlnk, "./file.txt", &sb);
+ if (ln1 == NULL) {
+ fprintf (stderr, "glfs_h_symlink: error creating %s: from (%p),%s\n",
+ my_symlnk, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ buf = calloc (1024, sizeof(char));
+ if (buf == NULL) {
+ fprintf (stderr, "Error allocating memory\n");
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_h_readlink (fs, ln1, buf, 1024);
+ if (ret <= 0) {
+ fprintf (stderr, "glfs_h_readlink: error reading %s: from (%p),%s\n",
+ my_symlnk, ln1, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ if (!(strncmp (buf, my_symlnk, strlen (my_symlnk)))) {
+ fprintf (stderr, "glfs_h_readlink: error mismatch in link name: actual %s: retrieved %s\n",
+ my_symlnk, buf);
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ /* link: /testdir/linkdir/file.txt to ./lnk.txt */
+ ret = glfs_h_link (fs, leaf, parent, my_lnk);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ /* link: /testdir/linkdir/dir1/file.txt to ../dir2/slnk.txt */
+ ret = glfs_h_link (fs, dleaf, dirtgt, my_lnk);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, dirtgt, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ printf ("glfs_h_link(s) tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (dirsrc)
+ glfs_h_close (dirsrc);
+ if (dirtgt)
+ glfs_h_close (dirtgt);
+ if (dleaf)
+ glfs_h_close (dleaf);
+ if (ln1)
+ glfs_h_close (ln1);
+ if (buf)
+ free (buf);
+
+ return;
+}
+
+void
+test_h_rename (void)
+{
+ char *my_dir = "renamedir";
+ char *my_file = "file.txt";
+ char *src_dir = "dir1";
+ char *tgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_rename tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_mkdir (fs, root, my_dir, 0644, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirsrc = glfs_h_mkdir (fs, parent, src_dir, 0644, &sb);
+ if (dirsrc == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ src_dir, parent, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirtgt = glfs_h_mkdir (fs, parent, tgt_dir, 0644, &sb);
+ if (dirtgt == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ tgt_dir, parent, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dleaf = glfs_h_creat (fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* run tests */
+ /* Rename file.txt -> file1.txt */
+ ret = glfs_h_rename (fs, parent, "file.txt", parent, "file1.txt");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file1.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1/file.txt -> file.txt */
+ ret = glfs_h_rename (fs, dirsrc, "file.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s/%s to %s (%s)\n",
+ src_dir, "file.txt", "file.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file1.txt -> file.txt (exists) */
+ ret = glfs_h_rename (fs, parent, "file1.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1 -> dir3 */
+ ret = glfs_h_rename (fs, parent, "dir1", parent, "dir3");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "dir1", "dir3", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir2 ->dir3 (exists) */
+ ret = glfs_h_rename (fs, parent, "dir2", parent, "dir3");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "dir2", "dir3", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file.txt -> dir3 (fail) */
+ ret = glfs_h_rename (fs, parent, "file.txt", parent, "dir3");
+ if (ret == 0) {
+ fprintf (stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "file.txt", "dir3", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir3 -> file.txt (fail) */
+ ret = glfs_h_rename (fs, parent, "dir3", parent, "file.txt");
+ if (ret == 0) {
+ fprintf (stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "dir3", "file.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ printf ("glfs_h_rename tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (dirsrc)
+ glfs_h_close (dirsrc);
+ if (dirtgt)
+ glfs_h_close (dirtgt);
+ if (dleaf)
+ glfs_h_close (dleaf);
+
+ return;
+}
+
+void
+assimilatetime (struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+void
+test_h_performance (void)
+{
+ char *my_dir = "perftest",
+ *full_dir_path="/testdir/perftest";
+ char *my_file = "file_", my_file_name[MAXPATHNAME];
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb;
+ int ret, i;
+ struct glfs_fd *fd;
+ struct timespec c_ts = {0, 0}, c_ts_st, c_ts_ed;
+ struct timespec o_ts = {0, 0}, o_ts_st, o_ts_ed;
+
+ printf("glfs_h_performance tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ dir = glfs_h_mkdir (fs, parent, my_dir, 0644, &sb);
+ if (dir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* create performance */
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf (my_file_name, "%s%d", my_file, i);
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_lookupat (fs, dir, my_file_name, &sb);
+ if (leaf != NULL) {
+ fprintf (stderr, "glfs_h_lookup: exists %s\n",
+ my_file_name);
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat (fs, dir, my_file_name, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&c_ts, c_ts_st, c_ts_ed);
+ glfs_h_close (leaf); leaf = NULL;
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&o_ts, o_ts_st, o_ts_ed);
+
+ printf ("Creation performance (handle based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf ("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ o_ts.tv_sec, o_ts.tv_nsec);
+ printf ("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+
+ /* create using path */
+ c_ts.tv_sec = o_ts.tv_sec = 0;
+ c_ts.tv_nsec = o_ts.tv_nsec = 0;
+
+ sprintf (my_file_name, "%s1", full_dir_path);
+ ret = glfs_mkdir (fs, my_file_name, 0644);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf (my_file_name, "%s1/%sn%d", full_dir_path, my_file, i);
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_stat (fs, my_file_name, &sb);
+ if (ret == 0) {
+ fprintf (stderr, "glfs_stat: exists %s\n",
+ my_file_name);
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ fd = glfs_creat (fs, my_file_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&c_ts, c_ts_st, c_ts_ed);
+ glfs_close (fd);
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&o_ts, o_ts_st, o_ts_ed);
+
+ printf ("Creation performance (path based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf ("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ o_ts.tv_sec, o_ts.tv_nsec);
+ printf ("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+out:
+ return;
+}
+
+int
+test_handleops (int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ struct stat sb = {0, };
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *tmp = NULL;
+ char readbuf[32], writebuf[32];
+ unsigned char leaf_handle[GFAPI_HANDLE_LENGTH];
+
+ char *full_leaf_name = "/testdir/testfile.txt",
+ *leaf_name = "testfile.txt",
+ *relative_leaf_name = "testdir/testfile.txt";
+ char *leaf_name1 = "testfile1.txt";
+ char *full_newparent_name = "/testdir/dir1",
+ *newparent_name = "dir1";
+ char *full_newnod_name = "/testdir/nod1",
+ *newnod_name = "nod1";
+
+ /* Initialize test area */
+ ret = glfs_mkdir (fs, full_parent_name, 0644);
+ if (ret != 0 && errno != EEXIST) {
+ fprintf (stderr, "%s: (%p) %s\n", full_parent_name, fd,
+ strerror (errno));
+ printf ("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ else if (ret != 0) {
+ printf ("Found test directory %s to be existing\n",
+ full_parent_name);
+ printf ("Cleanup test directory and restart tests\n");
+ goto out;
+ }
+
+ fd = glfs_creat (fs, full_leaf_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf (stderr, "%s: (%p) %s\n", full_leaf_name, fd,
+ strerror (errno));
+ printf ("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ glfs_close (fd);
+
+ printf ("Initialized the test area, within volume %s\n", argv[1]);
+
+ /* Handle based APIs test area */
+
+ /* glfs_lookupat test */
+ printf ("glfs_h_lookupat tests: In Progress\n");
+ /* start at root of the volume */
+ root = glfs_h_lookupat (fs, NULL, "/", &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ "/", NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* lookup a parent within root */
+ parent = glfs_h_lookupat (fs, root, parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ parent_name, root, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* lookup a leaf/child within the parent */
+ leaf = glfs_h_lookupat (fs, parent, leaf_name, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name, parent, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* reset */
+ glfs_h_close (root); root = NULL;
+ glfs_h_close (leaf); leaf = NULL;
+ glfs_h_close (parent); parent = NULL;
+
+ /* check absolute paths */
+ root = glfs_h_lookupat (fs, NULL, "/", &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ "/", NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_lookupat (fs, NULL, full_leaf_name, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_leaf_name, parent, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* reset */
+ glfs_h_close (leaf); leaf = NULL;
+
+ /* check multiple component paths */
+ leaf = glfs_h_lookupat (fs, root, relative_leaf_name, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ relative_leaf_name, parent, strerror (errno));
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* reset */
+ glfs_h_close (root); root = NULL;
+ glfs_h_close (parent); parent = NULL;
+
+ /* check symlinks in path */
+
+ /* TODO: -ve test cases */
+ /* parent invalid
+ * path invalid
+ * path does not exist after some components
+ * no parent, but relative path
+ * parent and full path? -ve?
+ */
+
+ printf ("glfs_h_lookupat tests: PASSED\n");
+
+ /* glfs_openat test */
+ printf ("glfs_h_open tests: In Progress\n");
+ fd = glfs_h_open (fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror (errno));
+ printf ("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy (writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write (fd, writebuf, 32, 0);
+
+ glfs_lseek (fd, 0, SEEK_SET);
+
+ ret = glfs_read (fd, readbuf, 32, 0);
+ if (memcmp (readbuf, writebuf, 32)) {
+ printf ("Failed to read what I wrote: %s %s\n", readbuf,
+ writebuf);
+ glfs_close (fd);
+ printf ("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ glfs_h_close (leaf); leaf = NULL;
+ glfs_close (fd);
+
+ printf ("glfs_h_open tests: PASSED\n");
+
+ /* Create tests */
+ printf ("glfs_h_creat tests: In Progress\n");
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, leaf_name1, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_creat (fs, parent, leaf_name1, O_CREAT | O_EXCL, 0644,
+ &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf (stderr, "glfs_h_creat: existing file, leaf = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close (leaf); leaf = NULL;
+ }
+ }
+
+ tmp = glfs_h_creat (fs, root, parent_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || !(errno == EISDIR || errno == EINVAL)) {
+ fprintf (stderr, "glfs_h_creat: dir create, tmp = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close (tmp); tmp = NULL;
+ }
+ }
+
+ /* TODO: Other combinations and -ve cases as applicable */
+ printf ("glfs_h_creat tests: PASSED\n");
+
+ /* extract handle and create from handle test */
+ printf ("glfs_h_extract_handle and glfs_h_create_from_handle tests: In Progress\n");
+ /* TODO: Change the lookup to creat below for a GIFD recovery falure,
+ * that needs to be fixed */
+ leaf = glfs_h_lookupat (fs, parent, leaf_name1, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror (errno));
+ printf ("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ ret = glfs_h_extract_handle (leaf, leaf_handle,
+ GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf (stderr, "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ full_leaf_name, strerror (errno));
+ printf ("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_handle (leaf_handle);
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_create_from_handle (fs, leaf_handle, GFAPI_HANDLE_LENGTH,
+ &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ leaf_name1, leaf_handle, strerror (errno));
+ printf ("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ fd = glfs_h_open (fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror (errno));
+ printf ("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy (writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write (fd, writebuf, 32, 0);
+
+ glfs_lseek (fd, 0, SEEK_SET);
+
+ ret = glfs_read (fd, readbuf, 32, 0);
+ if (memcmp (readbuf, writebuf, 32)) {
+ printf ("Failed to read what I wrote: %s %s\n", writebuf,
+ writebuf);
+ printf ("glfs_h_create_from_handle tests: FAILED\n");
+ glfs_close (fd);
+ goto out;
+ }
+
+ glfs_close (fd);
+ glfs_h_close (leaf); leaf = NULL;
+ glfs_h_close (parent); parent = NULL;
+
+ printf ("glfs_h_extract_handle and glfs_h_create_from_handle tests: PASSED\n");
+
+ /* Mkdir tests */
+ printf ("glfs_h_mkdir tests: In Progress\n");
+
+ ret = glfs_rmdir (fs, full_newparent_name);
+ if (ret && errno != ENOENT) {
+ fprintf (stderr, "glfs_rmdir: Failed for %s: %s\n",
+ full_newparent_name, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_mkdir (fs, parent, newparent_name, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newparent_name, parent, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_mkdir (fs, parent, newparent_name, 0644, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf (stderr, "glfs_h_mkdir: existing directory, leaf = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close (leaf); leaf = NULL;
+ }
+ }
+
+ glfs_h_close (parent); parent = NULL;
+
+ printf ("glfs_h_mkdir tests: PASSED\n");
+
+ /* Mknod tests */
+ printf ("glfs_h_mknod tests: In Progress\n");
+ ret = glfs_unlink (fs, full_newnod_name);
+ if (ret && errno != ENOENT) {
+ fprintf (stderr, "glfs_unlink: Failed for %s: %s\n",
+ full_newnod_name, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_mknod (fs, parent, newnod_name, S_IFIFO, 0, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newnod_name, parent, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* TODO: creat op on a FIFO node hangs, need to check and fix
+ tmp = glfs_h_creat (fs, parent, newnod_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || errno != EINVAL) {
+ fprintf (stderr, "glfs_h_creat: node create, tmp = (%p), errno = %s\n",
+ tmp, strerror (errno));
+ printf ("glfs_h_creat/mknod tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close(tmp); tmp = NULL;
+ }
+ } */
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_mknod (fs, parent, newnod_name, 0644, 0, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf (stderr, "glfs_h_mknod: existing node, leaf = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close (leaf); leaf = NULL;
+ }
+ }
+
+ glfs_h_close (parent); parent = NULL;
+
+ printf ("glfs_h_mknod tests: PASSED\n");
+
+ /* unlink tests */
+ test_h_unlink ();
+
+ /* TODO: opendir tests */
+
+ /* getattr tests */
+ test_h_getsetattrs ();
+
+ /* TODO: setattr tests */
+
+ /* truncate tests */
+ test_h_truncate();
+
+ /* link tests */
+ test_h_links ();
+
+ /* rename tests */
+ test_h_rename ();
+
+ /* performance tests */
+ test_h_performance ();
+
+ /* END: New APIs test area */
+
+out:
+ /* Cleanup glfs handles */
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+
+ return ret;
+}
+
+int
+main (int argc, char *argv[])
+{
+ glfs_t *fs2 = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {0, };
+ char readbuf[32];
+ char writebuf[32];
+
+ char *filename = "/filename2";
+
+ if (argc != 3) {
+ printf ("Expect following args\n\t%s <volname> <hostname>\n", argv[0]);
+ return -1;
+ }
+
+ fs = glfs_new (argv[1]);
+ if (!fs) {
+ fprintf (stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+// ret = glfs_set_volfile (fs, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server (fs, "tcp", argv[2], 24007);
+
+// ret = glfs_set_volfile_server (fs, "unix", "/tmp/gluster.sock", 0);
+
+ ret = glfs_set_logging (fs, "/dev/stderr", 7);
+
+ ret = glfs_init (fs);
+
+ fprintf (stderr, "glfs_init: returned %d\n", ret);
+
+ sleep (2);
+
+ fs2 = glfs_new (argv[1]);
+ if (!fs2) {
+ fprintf (stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+
+// ret = glfs_set_volfile (fs2, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server (fs2, "tcp", argv[2], 24007);
+
+ ret = glfs_set_logging (fs2, "/dev/stderr", 7);
+
+ ret = glfs_init (fs2);
+
+ fprintf (stderr, "glfs_init: returned %d\n", ret);
+
+ ret = glfs_lstat (fs, filename, &sb);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+ fd = glfs_creat (fs, filename, O_RDWR, 0644);
+ fprintf (stderr, "%s: (%p) %s\n", filename, fd, strerror (errno));
+
+ fd2 = glfs_open (fs2, filename, O_RDWR);
+ fprintf (stderr, "%s: (%p) %s\n", filename, fd, strerror (errno));
+
+ sprintf (writebuf, "hi there\n");
+ ret = glfs_write (fd, writebuf, 32, 0);
+
+ glfs_lseek (fd2, 0, SEEK_SET);
+
+ ret = glfs_read (fd2, readbuf, 32, 0);
+
+ printf ("read %d, %s", ret, readbuf);
+
+ glfs_close (fd);
+ glfs_close (fd2);
+
+ filename = "/filename3";
+ ret = glfs_mknod (fs, filename, S_IFIFO, 0);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+ ret = glfs_lstat (fs, filename, &sb);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+
+ ret = glfs_rename (fs, filename, "/filename4");
+ fprintf (stderr, "rename(%s): (%d) %s\n", filename, ret,
+ strerror (errno));
+
+ ret = glfs_unlink (fs, "/filename4");
+ fprintf (stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
+ strerror (errno));
+
+ filename = "/dirname2";
+ ret = glfs_mkdir (fs, filename, 0);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+ ret = glfs_lstat (fs, filename, &sb);
+ fprintf (stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror (errno));
+
+ ret = glfs_rmdir (fs, filename);
+ fprintf (stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror (errno));
+
+ test_dirops (fs);
+
+ test_xattr (fs);
+
+ test_chdir (fs);
+
+ test_handleops (argc, argv);
+ // done
+
+ glfs_fini (fs);
+ glfs_fini (fs2);
+
+ return ret;
+}
diff --git a/api/examples/setup.py.in b/api/examples/setup.py.in
new file mode 100644
index 000000000..44b738094
--- /dev/null
+++ b/api/examples/setup.py.in
@@ -0,0 +1,29 @@
+from distutils.core import setup
+
+# generate a __init__.py for the package namespace
+fo = open('__init__.py', 'w')
+fo.write('__version__ = "@PACKAGE_VERSION@"\n')
+fo.close()
+
+DESC = """GlusterFS is a clustered file-system capable of scaling to
+several petabytes. It aggregates various storage bricks over Infiniband
+RDMA or TCP/IP interconnect into one large parallel network file system.
+GlusterFS is one of the most sophisticated file systems in terms of
+features and extensibility. It borrows a powerful concept called
+Translators from GNU Hurd kernel. Much of the code in GlusterFS is in
+user space and easily manageable.
+
+This package contains the Python interface to the libgfapi library."""
+
+setup(
+ name='glusterfs-api',
+ version='@PACKAGE_VERSION@',
+ description='Python client library for the GlusterFS libgfapi',
+ long_description=DESC,
+ author='Gluster Community',
+ author_email='gluster-devel@nongnu.org',
+ license='LGPLv3',
+ url='http://gluster.org/',
+ package_dir={'gluster':''},
+ packages=['gluster']
+)
diff --git a/api/src/Makefile.am b/api/src/Makefile.am
new file mode 100644
index 000000000..7c5df3e20
--- /dev/null
+++ b/api/src/Makefile.am
@@ -0,0 +1,36 @@
+lib_LTLIBRARIES = libgfapi.la
+noinst_HEADERS = glfs-mem-types.h glfs-internal.h
+libgfapi_HEADERS = glfs.h glfs-handles.h
+libgfapidir = $(includedir)/glusterfs/api
+
+libgfapi_la_SOURCES = glfs.c glfs-mgmt.c glfs-fops.c glfs-resolve.c \
+ glfs-handleops.c
+libgfapi_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(GF_LDADD)
+
+libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src
+
+
+xlator_LTLIBRARIES = api.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount
+# workaround for broken parallel install support in automake with LTLIBRARIES
+# http://debbugs.gnu.org/cgi/bugreport.cgi?bug=7328
+install_xlatorLTLIBRARIES = install-xlatorLTLIBRARIES
+$(install_xlatorLTLIBRARIES): install-libLTLIBRARIES
+
+api_la_SOURCES = glfs-master.c
+api_la_DEPENDENCIES = libgfapi.la
+api_la_LDFLAGS = -module -avoid-version
+api_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/api/src/libgfapi.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
new file mode 100644
index 000000000..10bb7d38b
--- /dev/null
+++ b/api/src/glfs-fops.c
@@ -0,0 +1,3252 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include "syncop.h"
+#include "glfs.h"
+#include <limits.h>
+
+#ifdef NAME_MAX
+#define GF_NAME_MAX NAME_MAX
+#else
+#define GF_NAME_MAX 255
+#endif
+
+#define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1)
+
+int
+glfs_loc_link (loc_t *loc, struct iatt *iatt)
+{
+ int ret = -1;
+ inode_t *linked_inode = NULL;
+
+ if (!loc->inode) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ linked_inode = inode_link (loc->inode, loc->parent, loc->name, iatt);
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ ret = 0;
+ } else {
+ ret = -1;
+ errno = ENOMEM;
+ }
+
+ return ret;
+}
+
+
+void
+glfs_iatt_to_stat (struct glfs *fs, struct iatt *iatt, struct stat *stat)
+{
+ iatt_to_stat (iatt, stat);
+ stat->st_dev = fs->dev_id;
+}
+
+
+int
+glfs_loc_unlink (loc_t *loc)
+{
+ inode_unlink (loc->inode, loc->parent, loc->name);
+
+ return 0;
+}
+
+
+struct glfs_fd *
+glfs_open (struct glfs *fs, const char *path, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG (iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref (glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_open (subvol, &loc, flags, glfd->fd);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd->fd->flags = flags;
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+
+int
+glfs_close (struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct glfs *fs = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_flush (subvol, fd);
+out:
+ fs = glfd->fs;
+ glfs_fd_destroy (glfd);
+
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_lstat (struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_stat (struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fstat (struct glfs_fd *glfd, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct iatt iatt = {0, };
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fstat (subvol, fd, &iatt);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat (glfd->fs, &iatt, stat);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+struct glfs_fd *
+glfs_creat (struct glfs *fs, const char *path, int flags, mode_t mode)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ /* This must be glfs_resolve() and NOT glfs_lresolve().
+ That is because open("name", O_CREAT) where "name"
+ is a danging symlink must create the dangling
+ destinataion.
+ */
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ if (loc.inode) {
+ if (flags & O_EXCL) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG (iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ }
+
+ if (ret == -1 && errno == ENOENT) {
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref (glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (ret == 0) {
+ ret = syncop_open (subvol, &loc, flags, glfd->fd);
+ } else {
+ ret = syncop_create (subvol, &loc, flags, mode, glfd->fd,
+ xattr_req, &iatt);
+ }
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd->fd->flags = flags;
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+
+off_t
+glfs_lseek (struct glfs_fd *glfd, off_t offset, int whence)
+{
+ struct stat sb = {0, };
+ int ret = -1;
+
+ __glfs_entry_fd (glfd);
+
+ switch (whence) {
+ case SEEK_SET:
+ glfd->offset = offset;
+ break;
+ case SEEK_CUR:
+ glfd->offset += offset;
+ break;
+ case SEEK_END:
+ ret = glfs_fstat (glfd, &sb);
+ if (ret) {
+ /* seek cannot fail :O */
+ break;
+ }
+ glfd->offset = sb.st_size + offset;
+ break;
+ }
+
+ return glfd->offset;
+}
+
+
+//////////////
+
+ssize_t
+glfs_preadv (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ xlator_t *subvol = NULL;
+ ssize_t ret = -1;
+ ssize_t size = -1;
+ struct iovec *iov = NULL;
+ int cnt = 0;
+ struct iobref *iobref = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ size = iov_length (iovec, iovcnt);
+
+ ret = syncop_readv (subvol, fd, size, offset, 0, &iov, &cnt, &iobref);
+ if (ret <= 0)
+ goto out;
+
+ size = iov_copy (iovec, iovcnt, iov, cnt); /* FIXME!!! */
+
+ glfd->offset = (offset + size);
+
+ ret = size;
+out:
+ if (iov)
+ GF_FREE (iov);
+ if (iobref)
+ iobref_unref (iobref);
+
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_read (struct glfs_fd *glfd, void *buf, size_t count, int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv (glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_pread (struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv (glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_readv (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_preadv (glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+
+struct glfs_io {
+ struct glfs_fd *glfd;
+ int op;
+ off_t offset;
+ struct iovec *iov;
+ int count;
+ int flags;
+ glfs_io_cbk fn;
+ void *data;
+};
+
+
+static int
+glfs_io_async_cbk (int ret, call_frame_t *frame, void *data)
+{
+ struct glfs_io *gio = data;
+
+ gio->fn (gio->glfd, ret, gio->data);
+
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+
+ return 0;
+}
+
+
+static int
+glfs_io_async_task (void *data)
+{
+ struct glfs_io *gio = data;
+ ssize_t ret = 0;
+
+ switch (gio->op) {
+ case GF_FOP_WRITE:
+ ret = glfs_pwritev (gio->glfd, gio->iov, gio->count,
+ gio->offset, gio->flags);
+ break;
+ case GF_FOP_FTRUNCATE:
+ ret = glfs_ftruncate (gio->glfd, gio->offset);
+ break;
+ case GF_FOP_FSYNC:
+ if (gio->flags)
+ ret = glfs_fdatasync (gio->glfd);
+ else
+ ret = glfs_fsync (gio->glfd);
+ break;
+ case GF_FOP_DISCARD:
+ ret = glfs_discard (gio->glfd, gio->offset, gio->count);
+ break;
+ case GF_FOP_ZEROFILL:
+ ret = glfs_zerofill(gio->glfd, gio->offset, gio->count);
+ break;
+ }
+
+ return (int) ret;
+}
+
+
+int
+glfs_preadv_async_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iovec *iovec,
+ int count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ struct glfs_io *gio = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs *fs = NULL;
+ struct glfs_fd *glfd = NULL;
+
+
+ gio = frame->local;
+ frame->local = NULL;
+ subvol = cookie;
+ glfd = gio->glfd;
+ fs = glfd->fs;
+
+ if (op_ret <= 0)
+ goto out;
+
+ op_ret = iov_copy (gio->iov, gio->count, iovec, count);
+
+ glfd->offset = gio->offset + op_ret;
+out:
+ errno = op_errno;
+ gio->fn (gio->glfd, op_ret, gio->data);
+
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ STACK_DESTROY (frame->root);
+ glfs_subvol_done (fs, subvol);
+
+ return 0;
+}
+
+
+int
+glfs_preadv_async (struct glfs_fd *glfd, const struct iovec *iovec, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ glfs_t *fs = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ fs = glfd->fs;
+
+ frame = syncop_create_frame (THIS);
+ if (!frame) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->iov = iov_dup (iovec, count);
+ if (!gio->iov) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_READ;
+ gio->glfd = glfd;
+ gio->count = count;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ STACK_WIND_COOKIE (frame, glfs_preadv_async_cbk, subvol, subvol,
+ subvol->fops->readv, fd, iov_length (iovec, count),
+ offset, flags, NULL);
+
+out:
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ STACK_DESTROY (frame->root);
+ glfs_subvol_done (fs, subvol);
+ }
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+int
+glfs_read_async (struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async (glfd, &iov, 1, glfd->offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_pread_async (struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async (glfd, &iov, 1, offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_readv_async (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_preadv_async (glfd, iov, count, glfd->offset, flags,
+ fn, data);
+ return ret;
+}
+
+///// writev /////
+
+ssize_t
+glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ size_t size = -1;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {0, };
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ size = iov_length (iovec, iovcnt);
+
+ iobuf = iobuf_get2 (subvol->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ iobuf_unref (iobuf);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = iobref_add (iobref, iobuf);
+ if (ret) {
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ iov_unload (iobuf_ptr (iobuf), iovec, iovcnt); /* FIXME!!! */
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = size;
+
+ ret = syncop_writev (subvol, fd, &iov, 1, offset, iobref, flags);
+
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+
+ if (ret <= 0)
+ goto out;
+
+ glfd->offset = (offset + size);
+
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_write (struct glfs_fd *glfd, const void *buf, size_t count, int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev (glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+
+
+ssize_t
+glfs_writev (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_pwritev (glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_pwrite (struct glfs_fd *glfd, const void *buf, size_t count, off_t offset,
+ int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev (glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+
+int
+glfs_pwritev_async (struct glfs_fd *glfd, const struct iovec *iovec, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->iov = iov_dup (iovec, count);
+ if (!gio->iov) {
+ GF_FREE (gio);
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_WRITE;
+ gio->glfd = glfd;
+ gio->count = count;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+
+int
+glfs_write_async (struct glfs_fd *glfd, const void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async (glfd, &iov, 1, glfd->offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_pwrite_async (struct glfs_fd *glfd, const void *buf, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async (glfd, &iov, 1, offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_writev_async (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_pwritev_async (glfd, iov, count, glfd->offset, flags,
+ fn, data);
+ return ret;
+}
+
+
+int
+glfs_fsync (struct glfs_fd *glfd)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fsync (subvol, fd, 0);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+static int
+glfs_fsync_async_common (struct glfs_fd *glfd, glfs_io_cbk fn, void *data,
+ int dataonly)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_FSYNC;
+ gio->glfd = glfd;
+ gio->flags = dataonly;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+
+}
+
+
+int
+glfs_fsync_async (struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ return glfs_fsync_async_common (glfd, fn, data, 0);
+}
+
+
+int
+glfs_fdatasync (struct glfs_fd *glfd)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fsync (subvol, fd, 1);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fdatasync_async (struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ return glfs_fsync_async_common (glfd, fn, data, 1);
+}
+
+
+int
+glfs_ftruncate (struct glfs_fd *glfd, off_t offset)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_ftruncate (subvol, fd, offset);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_ftruncate_async (struct glfs_fd *glfd, off_t offset,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_FTRUNCATE;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+
+int
+glfs_access (struct glfs *fs, const char *path, int mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_access (subvol, &loc, mode);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_symlink (struct glfs *fs, const char *data, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_symlink (subvol, &loc, data, xattr_req, &iatt);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_readlink (struct glfs *fs, const char *path, char *buf, size_t bufsiz)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+ char *linkval = NULL;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFLNK) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = syncop_readlink (subvol, &loc, &linkval, bufsiz);
+ if (ret > 0) {
+ memcpy (buf, linkval, ret);
+ GF_FREE (linkval);
+ }
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_mknod (struct glfs *fs, const char *path, mode_t mode, dev_t dev)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mknod (subvol, &loc, mode, dev, xattr_req, &iatt);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_mkdir (struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mkdir (subvol, &loc, mode, xattr_req, &iatt);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_unlink (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ ret = syncop_unlink (subvol, &loc);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_rmdir (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ ret = syncop_rmdir (subvol, &loc);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_rename (struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+ struct iatt oldiatt = {0, };
+ struct iatt newiatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve (fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &newloc, retrynew);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) !=
+ (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+ }
+
+ /* TODO: check if new or old is a prefix of the other, and fail EINVAL */
+
+ ret = syncop_rename (subvol, &oldloc, &newloc);
+
+ if (ret == -1 && errno == ESTALE) {
+ if (reval < DEFAULT_REVAL_COUNT) {
+ reval++;
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+ goto retry;
+ }
+ }
+
+ if (ret == 0)
+ inode_rename (oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode,
+ &oldiatt);
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_link (struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+ struct iatt oldiatt = {0, };
+ struct iatt newiatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve (fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &newloc, retrynew);
+
+ if (ret == 0) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (oldiatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ original file
+ */
+ if (newloc.inode) {
+ inode_unref (newloc.inode);
+ newloc.inode = NULL;
+ }
+ newloc.inode = inode_ref (oldloc.inode);
+
+ ret = syncop_link (subvol, &oldloc, &newloc);
+
+ if (ret == -1 && errno == ESTALE) {
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+ if (reval--)
+ goto retry;
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_link (&newloc, &oldiatt);
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+struct glfs_fd *
+glfs_opendir (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD (&glfd->entries);
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref (glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_opendir (subvol, &loc, glfd->fd);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else {
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+
+int
+glfs_closedir (struct glfs_fd *glfd)
+{
+ __glfs_entry_fd (glfd);
+
+ gf_dirent_free (list_entry (&glfd->entries, gf_dirent_t, list));
+
+ glfs_fd_destroy (glfd);
+
+ return 0;
+}
+
+
+long
+glfs_telldir (struct glfs_fd *fd)
+{
+ return fd->offset;
+}
+
+
+void
+glfs_seekdir (struct glfs_fd *fd, long offset)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ if (fd->offset == offset)
+ return;
+
+ fd->offset = offset;
+ fd->next = NULL;
+
+ list_for_each_entry_safe (entry, tmp, &fd->entries, list) {
+ if (entry->d_off != offset)
+ continue;
+
+ if (&tmp->list != &fd->entries) {
+ /* found! */
+ fd->next = tmp;
+ return;
+ }
+ }
+ /* could not find entry at requested offset in the cache.
+ next readdir_r() will result in glfd_entry_refresh()
+ */
+}
+
+int
+glfs_discard_async (struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_DISCARD;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+int
+glfs_zerofill_async (struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_ZEROFILL;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+
+void
+gf_dirent_to_dirent (gf_dirent_t *gf_dirent, struct dirent *dirent)
+{
+ dirent->d_ino = gf_dirent->d_ino;
+
+#ifdef _DIRENT_HAVE_D_OFF
+ dirent->d_off = gf_dirent->d_off;
+#endif
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ dirent->d_type = gf_dirent->d_type;
+#endif
+
+#ifdef _DIRENT_HAVE_D_NAMLEN
+ dirent->d_namlen = strlen (gf_dirent->d_name);
+#endif
+
+ strncpy (dirent->d_name, gf_dirent->d_name, GF_NAME_MAX + 1);
+}
+
+
+int
+glfd_entry_refresh (struct glfs_fd *glfd, int plus)
+{
+ xlator_t *subvol = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t old;
+ int ret = -1;
+ fd_t *fd = NULL;
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ if (fd->inode->ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = EBADF;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ INIT_LIST_HEAD (&old.list);
+
+ if (plus)
+ ret = syncop_readdirp (subvol, fd, 131072, glfd->offset,
+ NULL, &entries);
+ else
+ ret = syncop_readdir (subvol, fd, 131072, glfd->offset,
+ &entries);
+ if (ret >= 0) {
+ if (plus)
+ gf_link_inodes_from_dirent (THIS, fd->inode, &entries);
+
+ list_splice_init (&glfd->entries, &old.list);
+ list_splice_init (&entries.list, &glfd->entries);
+
+ /* spurious errno is dangerous for glfd_entry_next() */
+ errno = 0;
+ }
+
+ if (ret > 0)
+ glfd->next = list_entry (glfd->entries.next, gf_dirent_t, list);
+
+ gf_dirent_free (&old);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+gf_dirent_t *
+glfd_entry_next (struct glfs_fd *glfd, int plus)
+{
+ gf_dirent_t *entry = NULL;
+ int ret = -1;
+
+ if (!glfd->offset || !glfd->next) {
+ ret = glfd_entry_refresh (glfd, plus);
+ if (ret < 0)
+ return NULL;
+ }
+
+ entry = glfd->next;
+ if (!entry)
+ return NULL;
+
+ if (&entry->next->list == &glfd->entries)
+ glfd->next = NULL;
+ else
+ glfd->next = entry->next;
+
+ glfd->offset = entry->d_off;
+
+ return entry;
+}
+
+
+static struct dirent *
+glfs_readdirbuf_get (struct glfs_fd *glfd)
+{
+ struct dirent *buf = NULL;
+
+ LOCK (&glfd->fd->lock);
+ {
+ buf = glfd->readdirbuf;
+ if (buf) {
+ memset (buf, 0, READDIRBUF_SIZE);
+ goto unlock;
+ }
+
+ buf = GF_CALLOC (1, READDIRBUF_SIZE, glfs_mt_readdirbuf_t);
+ if (!buf) {
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ glfd->readdirbuf = buf;
+ }
+unlock:
+ UNLOCK (&glfd->fd->lock);
+
+ return buf;
+}
+
+
+int
+glfs_readdirplus_r (struct glfs_fd *glfd, struct stat *stat, struct dirent *ext,
+ struct dirent **res)
+{
+ int ret = 0;
+ gf_dirent_t *entry = NULL;
+ struct dirent *buf = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ errno = 0;
+
+ if (ext)
+ buf = ext;
+ else
+ buf = glfs_readdirbuf_get (glfd);
+
+ if (!buf) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ entry = glfd_entry_next (glfd, !!stat);
+ if (errno)
+ ret = -1;
+
+ if (res) {
+ if (entry)
+ *res = buf;
+ else
+ *res = NULL;
+ }
+
+ if (entry) {
+ gf_dirent_to_dirent (entry, buf);
+ if (stat)
+ glfs_iatt_to_stat (glfd->fs, &entry->d_stat, stat);
+ }
+
+ return ret;
+}
+
+
+int
+glfs_readdir_r (struct glfs_fd *glfd, struct dirent *buf, struct dirent **res)
+{
+ return glfs_readdirplus_r (glfd, 0, buf, res);
+}
+
+
+struct dirent *
+glfs_readdirplus (struct glfs_fd *glfd, struct stat *stat)
+{
+ struct dirent *res = NULL;
+ int ret = -1;
+
+ ret = glfs_readdirplus_r (glfd, stat, NULL, &res);
+ if (ret)
+ return NULL;
+
+ return res;
+}
+
+
+
+struct dirent *
+glfs_readdir (struct glfs_fd *glfd)
+{
+ return glfs_readdirplus (glfd, NULL);
+}
+
+
+int
+glfs_statvfs (struct glfs *fs, const char *path, struct statvfs *buf)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_statfs (subvol, &loc, buf);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_setattr (struct glfs *fs, const char *path, struct iatt *iatt,
+ int valid, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt riatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &riatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &riatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_setattr (subvol, &loc, iatt, valid, 0, 0);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fsetattr (struct glfs_fd *glfd, struct iatt *iatt, int valid)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fsetattr (subvol, fd, iatt, valid, 0, 0);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_chmod (struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ struct iatt iatt = {0, };
+ int valid = 0;
+
+ iatt.ia_prot = ia_prot_from_st_mode (mode);
+ valid = GF_SET_ATTR_MODE;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 1);
+
+ return ret;
+}
+
+
+int
+glfs_fchmod (struct glfs_fd *glfd, mode_t mode)
+{
+ int ret = -1;
+ struct iatt iatt = {0, };
+ int valid = 0;
+
+ iatt.ia_prot = ia_prot_from_st_mode (mode);
+ valid = GF_SET_ATTR_MODE;
+
+ ret = glfs_fsetattr (glfd, &iatt, valid);
+
+ return ret;
+}
+
+
+int
+glfs_chown (struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_uid = uid;
+ iatt.ia_gid = gid;
+ valid = GF_SET_ATTR_UID|GF_SET_ATTR_GID;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 1);
+
+ return ret;
+}
+
+
+int
+glfs_lchown (struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_uid = uid;
+ iatt.ia_gid = gid;
+ valid = GF_SET_ATTR_UID|GF_SET_ATTR_GID;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 0);
+
+ return ret;
+}
+
+
+int
+glfs_fchown (struct glfs_fd *glfd, uid_t uid, gid_t gid)
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_uid = uid;
+ iatt.ia_gid = gid;
+ valid = GF_SET_ATTR_UID|GF_SET_ATTR_GID;
+
+ ret = glfs_fsetattr (glfd, &iatt, valid);
+
+ return ret;
+}
+
+
+int
+glfs_utimens (struct glfs *fs, const char *path, struct timespec times[2])
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_atime = times[0].tv_sec;
+ iatt.ia_atime_nsec = times[0].tv_nsec;
+ iatt.ia_mtime = times[1].tv_sec;
+ iatt.ia_mtime_nsec = times[1].tv_nsec;
+
+ valid = GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 1);
+
+ return ret;
+}
+
+
+int
+glfs_lutimens (struct glfs *fs, const char *path, struct timespec times[2])
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_atime = times[0].tv_sec;
+ iatt.ia_atime_nsec = times[0].tv_nsec;
+ iatt.ia_mtime = times[1].tv_sec;
+ iatt.ia_mtime_nsec = times[1].tv_nsec;
+
+ valid = GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 0);
+
+ return ret;
+}
+
+
+int
+glfs_futimens (struct glfs_fd *glfd, struct timespec times[2])
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_atime = times[0].tv_sec;
+ iatt.ia_atime_nsec = times[0].tv_nsec;
+ iatt.ia_mtime = times[1].tv_sec;
+ iatt.ia_mtime_nsec = times[1].tv_nsec;
+
+ valid = GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME;
+
+ ret = glfs_fsetattr (glfd, &iatt, valid);
+
+ return ret;
+}
+
+
+int
+glfs_getxattr_process (void *value, size_t size, dict_t *xattr,
+ const char *name)
+{
+ data_t *data = NULL;
+ int ret = -1;
+
+ data = dict_get (xattr, (char *)name);
+ if (!data) {
+ errno = ENODATA;
+ ret = -1;
+ goto out;
+ }
+
+ ret = data->len;
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy (value, data->data, ret);
+out:
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+
+ssize_t
+glfs_getxattr_common (struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr (subvol, &loc, &xattr, name);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process (value, size, xattr, name);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_getxattr (struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common (fs, path, name, value, size, 1);
+}
+
+
+ssize_t
+glfs_lgetxattr (struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common (fs, path, name, value, size, 0);
+}
+
+
+ssize_t
+glfs_fgetxattr (struct glfs_fd *glfd, const char *name, void *value,
+ size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr (subvol, fd, &xattr, name);
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process (value, size, xattr, name);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_listxattr_process (void *value, size_t size, dict_t *xattr)
+{
+ int ret = -1;
+
+ ret = dict_keys_join (NULL, 0, xattr, NULL);
+
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ dict_keys_join (value, size, xattr, NULL);
+out:
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+
+ssize_t
+glfs_listxattr_common (struct glfs *fs, const char *path, void *value,
+ size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr (subvol, &loc, &xattr, NULL);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process (value, size, xattr);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_listxattr (struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common (fs, path, value, size, 1);
+}
+
+
+ssize_t
+glfs_llistxattr (struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common (fs, path, value, size, 0);
+}
+
+
+ssize_t
+glfs_flistxattr (struct glfs_fd *glfd, void *value, size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr (subvol, fd, &xattr, NULL);
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process (value, size, xattr);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+dict_t *
+dict_for_key_value (const char *name, const char *value, size_t size)
+{
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ xattr = dict_new ();
+ if (!xattr)
+ return NULL;
+
+ ret = dict_set_static_bin (xattr, (char *)name, (void *)value, size);
+ if (ret) {
+ dict_destroy (xattr);
+ xattr = NULL;
+ }
+
+ return xattr;
+}
+
+
+int
+glfs_setxattr_common (struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ xattr = dict_for_key_value (name, value, size);
+ if (!xattr) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_setxattr (subvol, &loc, xattr, flags);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+out:
+ loc_wipe (&loc);
+ if (xattr)
+ dict_unref (xattr);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_setxattr (struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common (fs, path, name, value, size, flags, 1);
+}
+
+
+int
+glfs_lsetxattr (struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common (fs, path, name, value, size, flags, 0);
+}
+
+
+int
+glfs_fsetxattr (struct glfs_fd *glfd, const char *name, const void *value,
+ size_t size, int flags)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ xattr = dict_for_key_value (name, value, size);
+ if (!xattr) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (subvol, fd, xattr, flags);
+out:
+ if (xattr)
+ dict_unref (xattr);
+
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_removexattr_common (struct glfs *fs, const char *path, const char *name,
+ int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_removexattr (subvol, &loc, name);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_removexattr (struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common (fs, path, name, 1);
+}
+
+
+int
+glfs_lremovexattr (struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common (fs, path, name, 0);
+}
+
+
+int
+glfs_fremovexattr (struct glfs_fd *glfd, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fremovexattr (subvol, fd, name);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fallocate (struct glfs_fd *glfd, int keep_size, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fallocate (subvol, fd, keep_size, offset, len);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_discard (struct glfs_fd *glfd, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_discard (subvol, fd, offset, len);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_zerofill (struct glfs_fd *glfd, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_zerofill (subvol, fd, offset, len);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_chdir (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set (fs, loc.inode);
+
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fchdir (struct glfs_fd *glfd)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ inode = fd->inode;
+
+ if (!IA_ISDIR (inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set (glfd->fs, inode);
+ ret = 0;
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+char *
+glfs_realpath (struct glfs *fs, const char *path, char *resolved_path)
+{
+ int ret = -1;
+ char *retpath = NULL;
+ char *allocpath = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ if (resolved_path)
+ retpath = resolved_path;
+ else
+ retpath = allocpath = malloc (PATH_MAX + 1);
+
+ if (!retpath) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (loc.path) {
+ strncpy (retpath, loc.path, PATH_MAX);
+ retpath[PATH_MAX] = 0;
+ }
+
+out:
+ loc_wipe (&loc);
+
+ if (ret == -1) {
+ if (allocpath)
+ free (allocpath);
+ retpath = NULL;
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return retpath;
+}
+
+
+char *
+glfs_getcwd (struct glfs *fs, char *buf, size_t n)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ char *path = NULL;
+
+ __glfs_entry_fs (fs);
+
+ if (!buf || n < 2) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ inode = glfs_cwd_get (fs);
+
+ if (!inode) {
+ strncpy (buf, "/", n);
+ ret = 0;
+ goto out;
+ }
+
+ ret = inode_path (inode, 0, &path);
+ if (n <= ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ strncpy (buf, path, n);
+ ret = 0;
+out:
+ GF_FREE (path);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (ret < 0)
+ return NULL;
+
+ return buf;
+}
+
+
+static void
+gf_flock_to_flock (struct gf_flock *gf_flock, struct flock *flock)
+{
+ flock->l_type = gf_flock->l_type;
+ flock->l_whence = gf_flock->l_whence;
+ flock->l_start = gf_flock->l_start;
+ flock->l_len = gf_flock->l_len;
+ flock->l_pid = gf_flock->l_pid;
+}
+
+
+static void
+gf_flock_from_flock (struct gf_flock *gf_flock, struct flock *flock)
+{
+ gf_flock->l_type = flock->l_type;
+ gf_flock->l_whence = flock->l_whence;
+ gf_flock->l_start = flock->l_start;
+ gf_flock->l_len = flock->l_len;
+ gf_flock->l_pid = flock->l_pid;
+}
+
+
+int
+glfs_posix_lock (struct glfs_fd *glfd, int cmd, struct flock *flock)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct gf_flock gf_flock = {0, };
+ struct gf_flock saved_flock = {0, };
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ gf_flock_from_flock (&gf_flock, flock);
+ gf_flock_from_flock (&saved_flock, flock);
+ ret = syncop_lk (subvol, fd, cmd, &gf_flock);
+ gf_flock_to_flock (&gf_flock, flock);
+
+ if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW))
+ fd_lk_insert_and_merge (fd, cmd, &saved_flock);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+struct glfs_fd *
+glfs_dup (struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ glfs_fd_t *dupfd = NULL;
+ struct glfs *fs = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ fs = glfd->fs;
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ dupfd = glfs_fd_new (fs);
+ if (!dupfd) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ dupfd->fd = fd_ref (fd);
+out:
+ if (fd)
+ fd_unref (fd);
+ if (dupfd)
+ glfs_fd_bind (dupfd);
+
+ glfs_subvol_done (fs, subvol);
+
+ return dupfd;
+}
diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c
new file mode 100644
index 000000000..9c707a619
--- /dev/null
+++ b/api/src/glfs-handleops.c
@@ -0,0 +1,1278 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include "syncop.h"
+#include "glfs.h"
+#include "glfs-handles.h"
+
+static void
+glfs_iatt_from_stat (struct stat *stat, int valid, struct iatt *iatt,
+ int *glvalid)
+{
+ /* validate in args */
+ if ((stat == NULL) || (iatt == NULL) || (glvalid == NULL)) {
+ errno = EINVAL;
+ return;
+ }
+
+ *glvalid = 0;
+
+ if (valid & GFAPI_SET_ATTR_MODE) {
+ iatt->ia_prot = ia_prot_from_st_mode (stat->st_mode);
+ *glvalid |= GF_SET_ATTR_MODE;
+ }
+
+ if (valid & GFAPI_SET_ATTR_UID) {
+ iatt->ia_uid = stat->st_uid;
+ *glvalid |= GF_SET_ATTR_UID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_GID) {
+ iatt->ia_gid = stat->st_gid;
+ *glvalid |= GF_SET_ATTR_GID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_ATIME) {
+ iatt->ia_atime = stat->st_atime;
+ iatt->ia_atime_nsec = ST_ATIM_NSEC (stat);
+ *glvalid |= GF_SET_ATTR_ATIME;
+ }
+
+ if (valid & GFAPI_SET_ATTR_MTIME) {
+ iatt->ia_mtime = stat->st_mtime;
+ iatt->ia_mtime_nsec = ST_MTIM_NSEC (stat);
+ *glvalid |= GF_SET_ATTR_MTIME;
+ }
+
+ return;
+}
+
+struct glfs_object *
+glfs_h_lookupat (struct glfs *fs, struct glfs_object *parent,
+ const char *path, struct stat *stat)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {0, };
+ struct glfs_object *object = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ if (parent) {
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_at (fs, subvol, inode, path, &loc, &iatt,
+ 0 /*TODO: links? */, 0);
+
+ /* populate out args */
+ if (!ret) {
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_stat (struct glfs *fs, struct glfs_object *object, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_stat (subvol, &loc, &iatt);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat (fs, &iatt, stat);
+ }
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_getattrs (struct glfs *fs, struct glfs_object *object, struct stat *stat)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_base (fs, subvol, inode, &iatt);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat (fs, &iatt, stat);
+ }
+
+out:
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_setattrs (struct glfs *fs, struct glfs_object *object, struct stat *stat,
+ int valid)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int glvalid = 0;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (stat == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* map valid masks from in args */
+ glfs_iatt_from_stat (stat, valid, &iatt, &glvalid);
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_setattr (subvol, &loc, &iatt, glvalid, 0, 0);
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+struct glfs_fd *
+glfs_h_open (struct glfs *fs, struct glfs_object *object, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* check types to open */
+ if (IA_ISDIR (inode->ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG (inode->ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd->fd = fd_create (inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_open (subvol, &loc, flags, glfd->fd);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else {
+ glfd->fd->flags = flags;
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+struct glfs_object *
+glfs_h_creat (struct glfs *fs, struct glfs_object *parent, const char *path,
+ int flags, mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, path);
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* fop/op */
+ ret = syncop_create (subvol, &loc, flags, mode, glfd->fd,
+ xattr_req, &iatt);
+
+ /* populate out args */
+ if (ret == 0) {
+ /* TODO: If the inode existed in the cache (say file already
+ exists), then the glfs_loc_link will not update the
+ loc.inode, as a result we will have a 0000 GFID that we
+ would copy out to the object, this needs to be fixed.
+ */
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ if (glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+struct glfs_object *
+glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mkdir (subvol, &loc, mode, xattr_req, &iatt);
+
+ /* populate out args */
+ if ( ret == 0 ) {
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+struct glfs_object *
+glfs_h_mknod (struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, dev_t dev, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mknod (subvol, &loc, mode, dev, xattr_req, &iatt);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_unlink (struct glfs *fs, struct glfs_object *parent, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if ( !subvol ) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at (fs, subvol, inode, path, &loc, NULL, 0 , 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (!IA_ISDIR(loc.inode->ia_type)) {
+ ret = syncop_unlink (subvol, &loc);
+ if (ret != 0) {
+ goto out;
+ }
+ } else {
+ ret = syncop_rmdir (subvol, &loc);
+ if (ret != 0) {
+ goto out;
+ }
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+struct glfs_fd *
+glfs_h_opendir (struct glfs *fs, struct glfs_object *object)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (!IA_ISDIR (inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD (&glfd->entries);
+
+ glfd->fd = fd_create (inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_opendir (subvol, &loc, glfd->fd);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else {
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+ssize_t
+glfs_h_extract_handle (struct glfs_object *object, unsigned char *handle,
+ int len)
+{
+ ssize_t ret = -1;
+
+ /* validate in args */
+ if (object == NULL) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (!handle || !len) {
+ ret = GFAPI_HANDLE_LENGTH;
+ goto out;
+ }
+
+ if (len < GFAPI_HANDLE_LENGTH)
+ {
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy (handle, object->gfid, GFAPI_HANDLE_LENGTH);
+
+ ret = GFAPI_HANDLE_LENGTH;
+
+out:
+ return ret;
+}
+
+struct glfs_object *
+glfs_h_create_from_handle (struct glfs *fs, unsigned char *handle, int len,
+ struct stat *stat)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ struct iatt iatt = {0, };
+ inode_t *newinode = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (handle == NULL) || (len != GFAPI_HANDLE_LENGTH)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ memcpy (loc.gfid, handle, GFAPI_HANDLE_LENGTH);
+
+ newinode = inode_find (subvol->itable, loc.gfid);
+ if (newinode)
+ loc.inode = newinode;
+ else {
+ loc.inode = inode_new (subvol->itable);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = syncop_lookup (subvol, &loc, 0, &iatt, 0, 0);
+ if (ret) {
+ gf_log (subvol->name, GF_LOG_WARNING,
+ "inode refresh of %s failed: %s",
+ uuid_utoa (loc.gfid), strerror (errno));
+ goto out;
+ }
+
+ newinode = inode_link (loc.inode, 0, 0, &iatt);
+ if (newinode)
+ inode_lookup (newinode);
+ else {
+ gf_log (subvol->name, GF_LOG_WARNING,
+ "inode linking of %s failed: %s",
+ uuid_utoa (loc.gfid), strerror (errno));
+ errno = EINVAL;
+ goto out;
+ }
+
+ /* populate stat */
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ object = GF_CALLOC (1, sizeof(struct glfs_object),
+ glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* populate the return object */
+ object->inode = newinode;
+ uuid_copy (object->gfid, object->inode->gfid);
+
+out:
+ /* TODO: Check where the inode ref is being held? */
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_close (struct glfs_object *object)
+{
+ /* Release the held reference */
+ inode_unref (object->inode);
+ GF_FREE (object);
+
+ return 0;
+}
+
+int
+glfs_h_truncate (struct glfs *fs, struct glfs_object *object, off_t offset)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_truncate (subvol, &loc, (off_t)offset);
+
+ /* populate out args */
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+struct glfs_object *
+glfs_h_symlink (struct glfs *fs, struct glfs_object *parent, const char *name,
+ const char *data, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (name == NULL) ||
+ (data == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, name);
+
+ /* fop/op */
+ ret = syncop_symlink (subvol, &loc, data, xattr_req, &iatt);
+
+ /* populate out args */
+ if (ret == 0) {
+ /* TODO: If the inode existed in the cache (say file already
+ * exists), then the glfs_loc_link will not update the
+ * loc.inode, as a result we will have a 0000 GFID that we
+ * would copy out to the object, this needs to be fixed.
+ */
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_readlink (struct glfs *fs, struct glfs_object *object, char *buf,
+ size_t bufsiz)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ char *linkval = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (buf == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_readlink (subvol, &loc, &linkval, bufsiz);
+
+ /* populate out args */
+ if (ret > 0)
+ memcpy (buf, linkval, ret);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (linkval)
+ GF_FREE (linkval);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_link (struct glfs *fs, struct glfs_object *linksrc,
+ struct glfs_object *parent, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ inode_t *pinode = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (linksrc == NULL) || (parent == NULL) ||
+ (name == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, linksrc);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (inode->ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, oldloc, out);
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ pinode = glfs_resolve_inode (fs, subvol, parent);
+ if (!pinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* setup newloc based on parent */
+ newloc.parent = inode_ref (pinode);
+ newloc.name = name;
+ ret = glfs_loc_touchup (&newloc);
+ if (ret != 0) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ * original file
+ */
+ newloc.inode = inode_ref (inode);
+
+ /* fop/op */
+ ret = syncop_link (subvol, &oldloc, &newloc);
+
+ if (ret == 0)
+ /* TODO: No iatt to pass as there has been no lookup */
+ ret = glfs_loc_link (&newloc, NULL);
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (pinode)
+ inode_unref (pinode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_rename (struct glfs *fs, struct glfs_object *olddir, const char *oldname,
+ struct glfs_object *newdir, const char *newname)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *oldpinode = NULL;
+ inode_t *newpinode = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+ struct iatt oldiatt = {0, };
+ struct iatt newiatt = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (olddir == NULL) || (oldname == NULL) ||
+ (newdir == NULL) || (newname == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if ( !subvol ) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ oldpinode = glfs_resolve_inode (fs, subvol, olddir);
+ if (!oldpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at (fs, subvol, oldpinode, oldname, &oldloc,
+ &oldiatt, 0 , 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ newpinode = glfs_resolve_inode (fs, subvol, newdir);
+ if (!newpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at (fs, subvol, newpinode, newname, &newloc,
+ &newiatt, 0, 0);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) !=
+ (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+ }
+
+ /* TODO: check if new or old is a prefix of the other, and fail EINVAL */
+
+ ret = syncop_rename (subvol, &oldloc, &newloc);
+
+ if (ret == 0)
+ inode_rename (oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode,
+ &oldiatt);
+
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ if (oldpinode)
+ inode_unref (oldpinode);
+
+ if (newpinode)
+ inode_unref (newpinode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h
new file mode 100644
index 000000000..437f2cbc8
--- /dev/null
+++ b/api/src/glfs-handles.h
@@ -0,0 +1,143 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_HANDLES_H
+#define _GLFS_HANDLES_H
+
+#include "glfs.h"
+
+/* GLFS OBJECT BASED OPERATIONS
+ *
+ * The following APIs are introduced to provide an API framework that can work
+ * with gluster objects (files and directories), instead of absolute paths.
+ *
+ * The following API set can be related to the POSIX *at interfaces (like
+ * openat (2)). The intention of these APIs is to be able to operate based
+ * on parent object and looking up or creating child objects within, OR to be
+ * used on the actual object thus looked up or created, and retrieve information
+ * regarding the same.
+ *
+ * The APIs also provide for generating an opaque invariant handle to the
+ * object, that can later be used to lookup the object, instead of the regular
+ * glfs_h_* variants. The APIs that provide this behaviour are,
+ * glfs_h_extract_handle and glfs_h_create_from_handle.
+ *
+ * The object handles can be transitioned to fd based operations as supported
+ * by glfs.h calls, using the glfs_h_open call. This provides a way to move
+ * from objects to fd's akin to moving from path to fd for required operations.
+ *
+ * NOTE: The opaque invariant handle is the GFID of the object in reality, but
+ * maintained as an opaque data value, for potential internal changes to the
+ * same without impacting the caller.
+ *
+ * NOTE: Currently looking up an object can create multiple object handles to
+ * the same, i.e distinct glfs_object *. Hence each such looked up or received
+ * handle from other calls, would need to be closed. In the future, for a given
+ * object these pointers would be the same, and an ease of use API to forget all
+ * instances of this bject would be provided (instead of a per lookup close).
+ * This should not change the APIs in their current form.
+ *
+ */
+
+/* Values for valid falgs to be used when using XXXsetattr, to set multiple
+ attribute values passed via the related stat structure.
+ */
+#define GFAPI_SET_ATTR_MODE 0x1
+#define GFAPI_SET_ATTR_UID 0x2
+#define GFAPI_SET_ATTR_GID 0x4
+#define GFAPI_SET_ATTR_SIZE 0x8
+#define GFAPI_SET_ATTR_ATIME 0x10
+#define GFAPI_SET_ATTR_MTIME 0x20
+
+/* Handle length for object handles returned from glfs_h_extract_handle or
+ * glfs_h_create_from_handle */
+#define GFAPI_HANDLE_LENGTH 16
+
+__BEGIN_DECLS
+
+/*
+ * Notes:
+ *
+ * The file object handle. One per looked up, created file/directory
+ *
+ * This had been introduced to facilitate gfid/inode based gfapi
+ * - a requirement introduced by nfs-ganesha
+ */
+struct glfs_object;
+typedef struct glfs_object glfs_object_t;
+
+/* Handle based operations */
+/* Operations that generate handles */
+struct glfs_object *glfs_h_lookupat (struct glfs *fs,
+ struct glfs_object *parent,
+ const char *path, struct stat *stat);
+
+struct glfs_object *glfs_h_creat (struct glfs *fs, struct glfs_object *parent,
+ const char *path, int flags, mode_t mode,
+ struct stat *sb);
+
+struct glfs_object *glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent,
+ const char *path, mode_t flags,
+ struct stat *sb);
+
+struct glfs_object *glfs_h_mknod (struct glfs *fs, struct glfs_object *parent,
+ const char *path, mode_t mode, dev_t dev,
+ struct stat *sb);
+
+struct glfs_object *glfs_h_symlink (struct glfs *fs, struct glfs_object *parent,
+ const char *name, const char *data,
+ struct stat *stat);
+
+/* Operations on the actual objects */
+int glfs_h_unlink (struct glfs *fs, struct glfs_object *parent,
+ const char *path);
+
+int glfs_h_close (struct glfs_object *object);
+
+int glfs_caller_specific_init (void *uid_caller_key, void *gid_caller_key,
+ void *future);
+
+int glfs_h_truncate (struct glfs *fs, struct glfs_object *object, off_t offset);
+
+int glfs_h_stat(struct glfs *fs, struct glfs_object *object, struct stat *stat);
+
+int glfs_h_getattrs (struct glfs *fs, struct glfs_object *object,
+ struct stat *stat);
+
+int glfs_h_setattrs (struct glfs *fs, struct glfs_object *object,
+ struct stat *sb, int valid);
+
+int glfs_h_readlink (struct glfs *fs, struct glfs_object *object, char *buf,
+ size_t bufsiz);
+
+int glfs_h_link (struct glfs *fs, struct glfs_object *linktgt,
+ struct glfs_object *parent, const char *name);
+
+int glfs_h_rename (struct glfs *fs, struct glfs_object *olddir,
+ const char *oldname, struct glfs_object *newdir,
+ const char *newname);
+
+/* Operations enabling opaque invariant handle to object transitions */
+ssize_t glfs_h_extract_handle (struct glfs_object *object,
+ unsigned char *handle, int len);
+
+struct glfs_object *glfs_h_create_from_handle (struct glfs *fs,
+ unsigned char *handle, int len,
+ struct stat *stat);
+
+/* Operations enabling object handles to fd transitions */
+struct glfs_fd *glfs_h_opendir (struct glfs *fs, struct glfs_object *object);
+
+struct glfs_fd *glfs_h_open (struct glfs *fs, struct glfs_object *object,
+ int flags);
+
+__END_DECLS
+
+#endif /* !_GLFS_HANDLES_H */ \ No newline at end of file
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
new file mode 100644
index 000000000..ec1d5579d
--- /dev/null
+++ b/api/src/glfs-internal.h
@@ -0,0 +1,200 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _GLFS_INTERNAL_H
+#define _GLFS_INTERNAL_H
+
+#include "xlator.h"
+
+#define GLFS_SYMLINK_MAX_FOLLOW 2048
+
+#define DEFAULT_REVAL_COUNT 1
+
+#define ESTALE_RETRY(ret,errno,reval,loc,label) do { \
+ if (ret == -1 && errno == ESTALE) { \
+ if (reval < DEFAULT_REVAL_COUNT) { \
+ reval++; \
+ loc_wipe (loc); \
+ goto label; \
+ } \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_INODE(oinode, loc, label) do { \
+ loc.inode = inode_ref (oinode); \
+ uuid_copy (loc.gfid, oinode->gfid); \
+ ret = glfs_loc_touchup (&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_PINODE(pinode, loc, ret, errno, label, path) do { \
+ loc.inode = inode_new (pinode->table); \
+ if (!loc.inode) { \
+ ret = -1; \
+ errno = ENOMEM; \
+ goto label; \
+ } \
+ loc.parent = inode_ref (pinode); \
+ loc.name = path; \
+ ret = glfs_loc_touchup (&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+struct glfs;
+
+typedef int (*glfs_init_cbk) (struct glfs *fs, int ret);
+
+struct glfs {
+ char *volname;
+
+ glusterfs_ctx_t *ctx;
+
+ pthread_t poller;
+
+ glfs_init_cbk init_cbk;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int init;
+ int ret;
+ int err;
+
+ xlator_t *active_subvol;
+ xlator_t *next_subvol;
+ xlator_t *old_subvol;
+
+ char *oldvolfile;
+ ssize_t oldvollen;
+
+ inode_t *cwd;
+
+ uint32_t dev_id; /* Used to fill st_dev in struct stat */
+
+ struct list_head openfds;
+
+ gf_boolean_t migration_in_progress;
+};
+
+struct glfs_fd {
+ struct list_head openfds;
+ struct glfs *fs;
+ off_t offset;
+ fd_t *fd; /* Currently guared by @fs->mutex. TODO: per-glfd lock */
+ struct list_head entries;
+ gf_dirent_t *next;
+ struct dirent *readdirbuf;
+};
+
+/* glfs object handle introduced for the alternate gfapi implementation based
+ on glfs handles/gfid/inode
+*/
+struct glfs_object {
+ inode_t *inode;
+ uuid_t gfid;
+};
+
+#define DEFAULT_EVENT_POOL_SIZE 16384
+#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
+#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+
+int glfs_mgmt_init (struct glfs *fs);
+void glfs_init_done (struct glfs *fs, int ret);
+int glfs_process_volfp (struct glfs *fs, FILE *fp);
+int glfs_resolve (struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval);
+int glfs_lresolve (struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval);
+fd_t *glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+fd_t *__glfs_migrate_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+int glfs_first_lookup (xlator_t *subvol);
+
+static inline void
+__glfs_entry_fs (struct glfs *fs)
+{
+ THIS = fs->ctx->master;
+}
+
+
+static inline void
+__glfs_entry_fd (struct glfs_fd *fd)
+{
+ THIS = fd->fd->inode->table->xl->ctx->master;
+}
+
+
+/*
+ By default all lock attempts from user context must
+ use glfs_lock() and glfs_unlock(). This allows
+ for a safe implementation of graph migration where
+ we can give up the mutex during syncop calls so
+ that bottom up calls (particularly CHILD_UP notify)
+ can do a mutex_lock() on @glfs without deadlocking
+ the filesystem
+*/
+static inline int
+glfs_lock (struct glfs *fs)
+{
+ pthread_mutex_lock (&fs->mutex);
+
+ while (!fs->init)
+ pthread_cond_wait (&fs->cond, &fs->mutex);
+
+ while (fs->migration_in_progress)
+ pthread_cond_wait (&fs->cond, &fs->mutex);
+
+ return 0;
+}
+
+
+static inline void
+glfs_unlock (struct glfs *fs)
+{
+ pthread_mutex_unlock (&fs->mutex);
+}
+
+
+void glfs_fd_destroy (struct glfs_fd *glfd);
+
+struct glfs_fd *glfs_fd_new (struct glfs *fs);
+void glfs_fd_bind (struct glfs_fd *glfd);
+
+xlator_t * glfs_active_subvol (struct glfs *fs);
+xlator_t * __glfs_active_subvol (struct glfs *fs);
+void glfs_subvol_done (struct glfs *fs, xlator_t *subvol);
+
+inode_t * glfs_refresh_inode (xlator_t *subvol, inode_t *inode);
+
+inode_t *glfs_cwd_get (struct glfs *fs);
+int glfs_cwd_set (struct glfs *fs, inode_t *inode);
+inode_t *glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object);
+int glfs_create_object (loc_t *loc, struct glfs_object **retobject);
+int __glfs_cwd_set (struct glfs *fs, inode_t *inode);
+
+int glfs_resolve_base (struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt);
+int glfs_resolve_at (struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt,
+ int follow, int reval);
+int glfs_loc_touchup (loc_t *loc);
+void glfs_iatt_to_stat (struct glfs *fs, struct iatt *iatt, struct stat *stat);
+int glfs_loc_link (loc_t *loc, struct iatt *iatt);
+int glfs_loc_unlink (loc_t *loc);
+
+#endif /* !_GLFS_INTERNAL_H */
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
new file mode 100644
index 000000000..c02534c18
--- /dev/null
+++ b/api/src/glfs-master.c
@@ -0,0 +1,154 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+
+
+int
+glfs_graph_setup (struct glfs *fs, glusterfs_graph_t *graph)
+{
+ xlator_t *new_subvol = NULL;
+ xlator_t *old_subvol = NULL;
+ inode_table_t *itable = NULL;
+ int ret = -1;
+
+ new_subvol = graph->top;
+
+ /* This is called in a bottom-up context, it should specifically
+ NOT be glfs_lock()
+ */
+ pthread_mutex_lock (&fs->mutex);
+ {
+ if (new_subvol->switched ||
+ new_subvol == fs->active_subvol ||
+ new_subvol == fs->next_subvol) {
+ /* Spurious CHILD_UP event on old graph */
+ ret = 0;
+ goto unlock;
+ }
+
+ if (!new_subvol->itable) {
+ itable = inode_table_new (131072, new_subvol);
+ if (!itable) {
+ errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ new_subvol->itable = itable;
+ }
+
+ old_subvol = fs->next_subvol;
+ fs->next_subvol = new_subvol;
+ fs->next_subvol->winds++; /* first ref */
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock (&fs->mutex);
+
+ if (old_subvol)
+ /* wasn't picked up so far, skip */
+ glfs_subvol_done (fs, old_subvol);
+
+ return ret;
+}
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ glusterfs_graph_t *graph = NULL;
+ struct glfs *fs = NULL;
+
+ graph = data;
+ fs = this->private;
+
+ switch (event) {
+ case GF_EVENT_GRAPH_NEW:
+ gf_log (this->name, GF_LOG_INFO, "New graph %s (%d) coming up",
+ uuid_utoa ((unsigned char *)graph->graph_uuid),
+ graph->id);
+ break;
+ case GF_EVENT_CHILD_UP:
+ glfs_graph_setup (fs, graph);
+ glfs_init_done (fs, 0);
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ glfs_graph_setup (fs, graph);
+ glfs_init_done (fs, 1);
+ break;
+ case GF_EVENT_CHILD_CONNECTING:
+ break;
+ default:
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got notify event %d", event);
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, glfs_mt_end + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to initialise "
+ "memory accounting");
+ return ret;
+ }
+
+ return 0;
+}
+
+
+int
+init (xlator_t *this)
+{
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+
+}
+
+
+struct xlator_dumpops dumpops;
+
+
+struct xlator_fops fops;
+
+
+struct xlator_cbks cbks;
diff --git a/api/src/glfs-mem-types.h b/api/src/glfs-mem-types.h
new file mode 100644
index 000000000..3301b3da5
--- /dev/null
+++ b/api/src/glfs-mem-types.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_MEM_TYPES_H
+#define _GLFS_MEM_TYPES_H
+
+#include "mem-types.h"
+
+#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
+
+enum glfs_mem_types_ {
+ glfs_mt_glfs_t = GF_MEM_TYPE_START,
+ glfs_mt_call_pool_t,
+ glfs_mt_xlator_t,
+ glfs_mt_glfs_fd_t,
+ glfs_mt_glfs_io_t,
+ glfs_mt_volfile_t,
+ glfs_mt_xlator_cmdline_option_t,
+ glfs_mt_glfs_object_t,
+ glfs_mt_readdirbuf_t,
+ glfs_mt_end
+
+};
+#endif
+
diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
new file mode 100644
index 000000000..6843e9cb3
--- /dev/null
+++ b/api/src/glfs-mgmt.c
@@ -0,0 +1,543 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* _CONFIG_H */
+
+#include "glusterfs.h"
+#include "stack.h"
+#include "dict.h"
+#include "event.h"
+#include "defaults.h"
+
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "glusterfs3.h"
+#include "portmap-xdr.h"
+#include "xdr-generic.h"
+
+#include "syncop.h"
+#include "xlator.h"
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+
+
+int glfs_volfile_fetch (struct glfs *fs);
+
+int
+glfs_process_volfp (struct glfs *fs, FILE *fp)
+{
+ glusterfs_graph_t *graph = NULL;
+ int ret = -1;
+ xlator_t *trav = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ graph = glusterfs_graph_construct (fp);
+ if (!graph) {
+ gf_log ("glfs", GF_LOG_ERROR, "failed to construct the graph");
+ goto out;
+ }
+
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp (trav->type, "mount/fuse") == 0) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "fuse xlator cannot be specified "
+ "in volume file");
+ goto out;
+ }
+ }
+
+ ret = glusterfs_graph_prepare (graph, ctx);
+ if (ret) {
+ glusterfs_graph_destroy (graph);
+ goto out;
+ }
+
+ ret = glusterfs_graph_activate (graph, ctx);
+
+ if (ret) {
+ glusterfs_graph_destroy (graph);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose (fp);
+
+ if (!ctx->active) {
+ ret = -1;
+ }
+
+ return ret;
+}
+
+
+int
+mgmt_cbk_spec (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ struct glfs *fs = NULL;
+ xlator_t *this = NULL;
+
+ this = mydata;
+ fs = this->private;
+
+ glfs_volfile_fetch (fs);
+
+ return 0;
+}
+
+
+int
+mgmt_cbk_event (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ return 0;
+}
+
+
+rpcclnt_cb_actor_t mgmt_cbk_actors[] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, mgmt_cbk_spec },
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_CBK_EVENT_NOTIFY,
+ mgmt_cbk_event},
+};
+
+
+struct rpcclnt_cb_program mgmt_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = mgmt_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
+
+char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_SETVOLUME] = "SETVOLUME",
+ [GF_HNDSK_GETSPEC] = "GETSPEC",
+ [GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+};
+
+rpc_clnt_prog_t clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
+};
+
+
+int
+mgmt_submit_request (void *req, call_frame_t *frame,
+ glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+
+ iobuf = iobuf_get2 (ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (ctx->mgmt, prog, procnum, cbkfn,
+ &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+
+out:
+ if (iobref)
+ iobref_unref (iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+ return ret;
+}
+
+
+static int
+glusterfs_oldvolfile_update (struct glfs *fs, char *volfile, ssize_t size)
+{
+ int ret = -1;
+
+ fs->oldvollen = size;
+ if (!fs->oldvolfile) {
+ fs->oldvolfile = GF_CALLOC (1, size+1, glfs_mt_volfile_t);
+ } else {
+ fs->oldvolfile = GF_REALLOC (fs->oldvolfile, size+1);
+ }
+
+ if (!fs->oldvolfile) {
+ fs->oldvollen = 0;
+ } else {
+ memcpy (fs->oldvolfile, volfile, size);
+ fs->oldvollen = size;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+
+int
+mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getspec_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ ssize_t size = 0;
+ FILE *tmpfp = NULL;
+ int need_retry = 0;
+ struct glfs *fs = NULL;
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+ fs = ((xlator_t *)ctx->master)->private;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ need_retry = 1;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to get the 'volume file' from server");
+ ret = -1;
+ errno = rsp.op_errno;
+ goto out;
+ }
+
+ ret = 0;
+ size = rsp.op_ret;
+
+ if ((size == fs->oldvollen) &&
+ (memcmp (fs->oldvolfile, rsp.spec, size) == 0)) {
+ gf_log (frame->this->name, GF_LOG_INFO,
+ "No change in volfile, continuing");
+ goto out;
+ }
+
+ tmpfp = tmpfile ();
+ if (!tmpfp) {
+ ret = -1;
+ goto out;
+ }
+
+ fwrite (rsp.spec, size, 1, tmpfp);
+ fflush (tmpfp);
+ if (ferror (tmpfp)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if only options have changed. No need to reload the
+ * volfile if topology hasn't changed.
+ * glusterfs_volfile_reconfigure returns 3 possible return states
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
+ * return -1(or -ve) =======> Some Internal Error occurred during the operation
+ */
+
+ ret = glusterfs_volfile_reconfigure (fs->oldvollen, tmpfp, fs->ctx,
+ fs->oldvolfile);
+ if (ret == 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "No need to re-load volfile, reconfigure done");
+ ret = glusterfs_oldvolfile_update (fs, rsp.spec, size);
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "Reconfigure failed !!");
+ goto out;
+ }
+
+ ret = glfs_process_volfp (fs, tmpfp);
+ /* tmpfp closed */
+ tmpfp = NULL;
+ if (ret)
+ goto out;
+
+ ret = glusterfs_oldvolfile_update (fs, rsp.spec, size);
+out:
+ STACK_DESTROY (frame->root);
+
+ if (rsp.spec)
+ free (rsp.spec);
+
+ // Stop if server is running at an unsupported op-version
+ if (ENOTSUP == ret) {
+ gf_log ("mgmt", GF_LOG_ERROR, "Server is operating at an "
+ "op-version which is not supported");
+ errno = ENOTSUP;
+ glfs_init_done (fs, -1);
+ }
+
+ if (ret && ctx && !ctx->active) {
+ /* Do it only for the first time */
+ /* Failed to get the volume file, something wrong,
+ restart the process */
+ gf_log ("glfs-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ if (!need_retry) {
+ if (!errno)
+ errno = EINVAL;
+ glfs_init_done (fs, -1);
+ }
+ }
+
+ if (tmpfp)
+ fclose (tmpfp);
+
+ return 0;
+}
+
+
+int
+glfs_volfile_fetch (struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ gf_getspec_req req = {0, };
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ frame = create_frame (THIS, ctx->pool);
+
+ req.key = cmd_args->volfile_id;
+ req.flags = 0;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32 (dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set min-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set max-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize dictionary");
+ goto out;
+ }
+
+ ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GETSPEC, mgmt_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
+out:
+ return ret;
+}
+
+
+static int
+mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ cmd_args_t *cmd_args = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ struct glfs *fs = NULL;
+ int ret = 0;
+
+ this = mydata;
+ ctx = this->ctx;
+ fs = ((xlator_t *)ctx->master)->private;
+ cmd_args = &ctx->cmd_args;
+
+ switch (event) {
+ case RPC_CLNT_DISCONNECT:
+ if (!ctx->active) {
+ cmd_args->max_connect_attempts--;
+ gf_log ("glfs-mgmt", GF_LOG_ERROR,
+ "failed to connect with remote-host: %s",
+ strerror (errno));
+ gf_log ("glfs-mgmt", GF_LOG_INFO,
+ "%d connect attempts left",
+ cmd_args->max_connect_attempts);
+ if (0 >= cmd_args->max_connect_attempts) {
+ errno = ENOTCONN;
+ glfs_init_done (fs, -1);
+ }
+ }
+ break;
+ case RPC_CLNT_CONNECT:
+ rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn);
+
+ ret = glfs_volfile_fetch (fs);
+ if (ret && ctx && (ctx->active == NULL)) {
+ /* Do it only for the first time */
+ /* Exit the process.. there are some wrong options */
+ gf_log ("glfs-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ errno = EINVAL;
+ glfs_init_done (fs, -1);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+glusterfs_mgmt_notify (int32_t op, void *data, ...)
+{
+ int ret = 0;
+
+ switch (op)
+ {
+ case GF_EN_DEFRAG_STATUS:
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+
+int
+glfs_mgmt_init (struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ if (ctx->mgmt)
+ return 0;
+
+ if (cmd_args->volfile_server_port)
+ port = cmd_args->volfile_server_port;
+
+ host = "localhost";
+ if (cmd_args->volfile_server)
+ host = cmd_args->volfile_server;
+
+ ret = rpc_transport_inet_options_build (&options, host, port);
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new (options, THIS->ctx, THIS->name, 8);
+ if (!rpc) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to create rpc clnt");
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify (rpc, mgmt_rpc_notify, THIS);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register notify function");
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register (rpc, &mgmt_cbk_prog, THIS);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register callback function");
+ goto out;
+ }
+
+ ctx->notify = glusterfs_mgmt_notify;
+
+ /* This value should be set before doing the 'rpc_clnt_start()' as
+ the notify function uses this variable */
+ ctx->mgmt = rpc;
+
+ ret = rpc_clnt_start (rpc);
+out:
+ return ret;
+}
+
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
new file mode 100644
index 000000000..4ca2eb6fc
--- /dev/null
+++ b/api/src/glfs-resolve.c
@@ -0,0 +1,969 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "stack.h"
+#include "event.h"
+#include "glfs-mem-types.h"
+#include "common-utils.h"
+#include "syncop.h"
+#include "call-stub.h"
+
+#include "glfs-internal.h"
+
+#define graphid_str(subvol) (uuid_utoa((unsigned char *)subvol->graph->graph_uuid))
+
+
+int
+glfs_first_lookup_safe (xlator_t *subvol)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+
+ loc.inode = subvol->itable->root;
+ memset (loc.gfid, 0, 16);
+ loc.gfid[15] = 1;
+ loc.path = "/";
+ loc.name = "";
+
+ ret = syncop_lookup (subvol, &loc, 0, 0, 0, 0);
+
+ gf_log (subvol->name, GF_LOG_DEBUG, "first lookup complete %d", ret);
+
+ return ret;
+}
+
+
+int
+__glfs_first_lookup (struct glfs *fs, xlator_t *subvol)
+{
+ int ret = -1;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock (&fs->mutex);
+ {
+ ret = glfs_first_lookup_safe (subvol);
+ }
+ pthread_mutex_lock (&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast (&fs->cond);
+
+ return ret;
+}
+
+
+inode_t *
+glfs_refresh_inode_safe (xlator_t *subvol, inode_t *oldinode)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ struct iatt iatt = {0, };
+ inode_t *newinode = NULL;
+
+
+ if (!oldinode)
+ return NULL;
+
+ if (oldinode->table->xl == subvol)
+ return inode_ref (oldinode);
+
+ newinode = inode_find (subvol->itable, oldinode->gfid);
+ if (newinode)
+ return newinode;
+
+ uuid_copy (loc.gfid, oldinode->gfid);
+ loc.inode = inode_new (subvol->itable);
+ if (!loc.inode)
+ return NULL;
+
+ ret = syncop_lookup (subvol, &loc, 0, &iatt, 0, 0);
+
+ if (ret) {
+ gf_log (subvol->name, GF_LOG_WARNING,
+ "inode refresh of %s failed: %s",
+ uuid_utoa (oldinode->gfid), strerror (errno));
+ loc_wipe (&loc);
+ return NULL;
+ }
+
+ newinode = inode_link (loc.inode, 0, 0, &iatt);
+ if (newinode)
+ inode_lookup (newinode);
+
+ loc_wipe (&loc);
+
+ return newinode;
+}
+
+
+inode_t *
+__glfs_refresh_inode (struct glfs *fs, xlator_t *subvol, inode_t *inode)
+{
+ inode_t *newinode = NULL;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock (&fs->mutex);
+ {
+ newinode = glfs_refresh_inode_safe (subvol, inode);
+ }
+ pthread_mutex_lock (&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast (&fs->cond);
+
+ return newinode;
+}
+
+int
+glfs_loc_touchup (loc_t *loc)
+{
+ char *path = NULL;
+ int ret = -1;
+ char *bn = NULL;
+
+ if (loc->parent)
+ ret = inode_path (loc->parent, loc->name, &path);
+ else
+ ret = inode_path (loc->inode, 0, &path);
+
+ loc->path = path;
+
+ if (ret < 0 || !path) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ bn = strrchr (path, '/');
+ if (bn)
+ bn++;
+ loc->name = bn;
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+glfs_resolve_symlink (struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ char **lpath)
+{
+ loc_t loc = {0, };
+ char *path = NULL;
+ char *rpath = NULL;
+ int ret = -1;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+ ret = inode_path (inode, NULL, &rpath);
+ if (ret < 0)
+ goto out;
+ loc.path = rpath;
+
+ ret = syncop_readlink (subvol, &loc, &path, 4096);
+
+ if (ret < 0)
+ goto out;
+
+ if (lpath)
+ *lpath = path;
+out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+
+int
+glfs_resolve_base (struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ char *path = NULL;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+
+ ret = inode_path (loc.inode, NULL, &path);
+ loc.path = path;
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup (subvol, &loc, NULL, iatt, NULL, NULL);
+out:
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+
+inode_t *
+glfs_resolve_component (struct glfs *fs, xlator_t *subvol, inode_t *parent,
+ const char *component, struct iatt *iatt,
+ int force_lookup)
+{
+ loc_t loc = {0, };
+ inode_t *inode = NULL;
+ int reval = 0;
+ int ret = -1;
+ int glret = -1;
+ struct iatt ciatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+
+ loc.name = component;
+
+ loc.parent = inode_ref (parent);
+ uuid_copy (loc.pargfid, parent->gfid);
+
+
+ if (strcmp (component, ".") == 0)
+ loc.inode = inode_ref (parent);
+ else if (strcmp (component, "..") == 0)
+ loc.inode = inode_parent (parent, 0, 0);
+ else
+ loc.inode = inode_grep (parent->table, parent, component);
+
+ if (loc.inode) {
+ uuid_copy (loc.gfid, loc.inode->gfid);
+ reval = 1;
+
+ if (!force_lookup) {
+ inode = inode_ref (loc.inode);
+ ciatt.ia_type = inode->ia_type;
+ goto found;
+ }
+ } else {
+ uuid_generate (gfid);
+ loc.inode = inode_new (parent->table);
+ }
+
+ if (!loc.inode)
+ goto out;
+
+ glret = glfs_loc_touchup (&loc);
+ if (glret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup (subvol, &loc, NULL, &ciatt, NULL, NULL);
+ if (ret && reval) {
+ inode_unref (loc.inode);
+ loc.inode = inode_new (parent->table);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_lookup (subvol, &loc, xattr_req, &ciatt,
+ NULL, NULL);
+ }
+ if (ret)
+ goto out;
+
+ inode = inode_link (loc.inode, loc.parent, component, &ciatt);
+found:
+ if (inode)
+ inode_lookup (inode);
+ if (iatt)
+ *iatt = ciatt;
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ loc_wipe (&loc);
+
+ return inode;
+}
+
+
+int
+glfs_resolve_at (struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt,
+ int follow, int reval)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ char *saveptr = NULL;
+ char *path = NULL;
+ char *component = NULL;
+ char *next_component = NULL;
+ int ret = -1;
+ struct iatt ciatt = {0, };
+
+ path = gf_strdup (origpath);
+ if (!path) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ parent = NULL;
+ if (at && path[0] != '/') {
+ /* A relative resolution of a path which starts with '/'
+ is equal to an absolute path resolution.
+ */
+ inode = inode_ref (at);
+ } else {
+ inode = inode_ref (subvol->itable->root);
+
+ if (strcmp (path, "/") == 0)
+ glfs_resolve_base (fs, subvol, inode, &ciatt);
+ }
+
+ for (component = strtok_r (path, "/", &saveptr);
+ component; component = next_component) {
+
+ next_component = strtok_r (NULL, "/", &saveptr);
+
+ if (parent)
+ inode_unref (parent);
+
+ parent = inode;
+
+ inode = glfs_resolve_component (fs, subvol, parent,
+ component, &ciatt,
+ /* force hard lookup on the last
+ component, as the caller
+ wants proper iatt filled
+ */
+ (reval || (!next_component &&
+ iatt)));
+ if (!inode)
+ break;
+
+ if (IA_ISLNK (ciatt.ia_type) && (next_component || follow)) {
+ /* If the component is not the last piece,
+ then following it is necessary even if
+ not requested by the caller
+ */
+ char *lpath = NULL;
+ loc_t sym_loc = {0,};
+
+ if (follow > GLFS_SYMLINK_MAX_FOLLOW) {
+ errno = ELOOP;
+ ret = -1;
+ if (inode) {
+ inode_unref (inode);
+ inode = NULL;
+ }
+ break;
+ }
+
+ ret = glfs_resolve_symlink (fs, subvol, inode, &lpath);
+ inode_unref (inode);
+ inode = NULL;
+ if (ret < 0)
+ break;
+
+ ret = glfs_resolve_at (fs, subvol, parent, lpath,
+ &sym_loc,
+ /* followed iatt becomes the
+ component iatt
+ */
+ &ciatt,
+ /* always recurisvely follow while
+ following symlink
+ */
+ follow + 1, reval);
+ if (ret == 0)
+ inode = inode_ref (sym_loc.inode);
+ loc_wipe (&sym_loc);
+ GF_FREE (lpath);
+ }
+
+ if (!next_component)
+ break;
+
+ if (!IA_ISDIR (ciatt.ia_type)) {
+ /* next_component exists and this component is
+ not a directory
+ */
+ inode_unref (inode);
+ inode = NULL;
+ ret = -1;
+ errno = ENOTDIR;
+ break;
+ }
+ }
+
+ if (parent && next_component)
+ /* resolution failed mid-way */
+ goto out;
+
+ /* At this point, all components up to the last parent directory
+ have been resolved successfully (@parent). Resolution of basename
+ might have failed (@inode) if at all.
+ */
+
+ loc->parent = parent;
+ if (parent) {
+ uuid_copy (loc->pargfid, parent->gfid);
+ loc->name = component;
+ }
+
+ loc->inode = inode;
+ if (inode) {
+ uuid_copy (loc->gfid, inode->gfid);
+ if (iatt)
+ *iatt = ciatt;
+ ret = 0;
+ }
+
+ glfs_loc_touchup (loc);
+out:
+ GF_FREE (path);
+
+ /* do NOT loc_wipe here as only last component might be missing */
+
+ return ret;
+}
+
+
+int
+glfs_resolve_path (struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int follow, int reval)
+{
+ int ret = -1;
+ inode_t *cwd = NULL;
+
+ if (origpath[0] == '/')
+ return glfs_resolve_at (fs, subvol, NULL, origpath, loc, iatt,
+ follow, reval);
+
+ cwd = glfs_cwd_get (fs);
+
+ ret = glfs_resolve_at (fs, subvol, cwd, origpath, loc, iatt,
+ follow, reval);
+ if (cwd)
+ inode_unref (cwd);
+
+ return ret;
+}
+
+
+int
+glfs_resolve (struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path (fs, subvol, origpath, loc, iatt, 1, reval);
+
+ return ret;
+}
+
+
+int
+glfs_lresolve (struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path (fs, subvol, origpath, loc, iatt, 0, reval);
+
+ return ret;
+}
+
+
+int
+glfs_migrate_fd_locks_safe (struct glfs *fs, xlator_t *oldsubvol, fd_t *oldfd,
+ xlator_t *newsubvol, fd_t *newfd)
+{
+ dict_t *lockinfo = NULL;
+ int ret = 0;
+ char uuid1[64];
+
+ if (!oldfd->lk_ctx || fd_lk_ctx_empty (oldfd->lk_ctx))
+ return 0;
+
+ newfd->lk_ctx = fd_lk_ctx_ref (oldfd->lk_ctx);
+
+ ret = syncop_fgetxattr (oldsubvol, oldfd, &lockinfo,
+ GF_XATTR_LOCKINFO_KEY);
+ if (ret < 0) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fgetxattr (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (oldfd->inode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (oldsubvol), oldsubvol->graph->id);
+ goto out;
+ }
+
+ if (!dict_get (lockinfo, GF_XATTR_LOCKINFO_KEY)) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "missing lokinfo key (%s) on graph %s (%d)",
+ uuid_utoa_r (oldfd->inode->gfid, uuid1),
+ graphid_str (oldsubvol), oldsubvol->graph->id);
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (newsubvol, newfd, lockinfo, 0);
+ if (ret < 0) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fsetxattr (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (newfd->inode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+out:
+ if (lockinfo)
+ dict_unref (lockinfo);
+ return ret;
+}
+
+
+fd_t *
+glfs_migrate_fd_safe (struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
+{
+ fd_t *newfd = NULL;
+ inode_t *oldinode = NULL;
+ inode_t *newinode = NULL;
+ xlator_t *oldsubvol = NULL;
+ int ret = -1;
+ loc_t loc = {0, };
+ char uuid1[64];
+
+
+ oldinode = oldfd->inode;
+ oldsubvol = oldinode->table->xl;
+
+ if (oldsubvol == newsubvol)
+ return fd_ref (oldfd);
+
+ if (!oldsubvol->switched) {
+ ret = syncop_fsync (oldsubvol, oldfd, 0);
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fsync() failed (%s) on %s graph %s (%d)",
+ strerror (errno),
+ uuid_utoa_r (oldfd->inode->gfid, uuid1),
+ graphid_str (oldsubvol), oldsubvol->graph->id);
+ }
+ }
+
+ newinode = glfs_refresh_inode_safe (newsubvol, oldinode);
+ if (!newinode) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "inode (%s) refresh failed (%s) on graph %s (%d)",
+ uuid_utoa_r (oldinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ newfd = fd_create (newinode, getpid());
+ if (!newfd) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fd_create (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (newinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ loc.inode = inode_ref (newinode);
+
+ ret = inode_path (oldfd->inode, NULL, (char **)&loc.path);
+ if (ret < 0) {
+ gf_log (fs->volname, GF_LOG_INFO, "inode_path failed");
+ goto out;
+ }
+
+ uuid_copy (loc.gfid, oldinode->gfid);
+
+
+ if (IA_ISDIR (oldinode->ia_type))
+ ret = syncop_opendir (newsubvol, &loc, newfd);
+ else
+ ret = syncop_open (newsubvol, &loc,
+ oldfd->flags & ~(O_TRUNC|O_EXCL|O_CREAT),
+ newfd);
+ loc_wipe (&loc);
+
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "syncop_open%s (%s) failed (%s) on graph %s (%d)",
+ IA_ISDIR (oldinode->ia_type) ? "dir" : "",
+ uuid_utoa_r (newinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ ret = glfs_migrate_fd_locks_safe (fs, oldsubvol, oldfd, newsubvol,
+ newfd);
+
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "lock migration (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (newinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ newfd->flags = oldfd->flags;
+ fd_bind (newfd);
+out:
+ if (newinode)
+ inode_unref (newinode);
+
+ if (ret) {
+ fd_unref (newfd);
+ newfd = NULL;
+ }
+
+ return newfd;
+}
+
+
+fd_t *
+__glfs_migrate_fd (struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd)
+{
+ fd_t *oldfd = NULL;
+ fd_t *newfd = NULL;
+
+ oldfd = glfd->fd;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock (&fs->mutex);
+ {
+ newfd = glfs_migrate_fd_safe (fs, newsubvol, oldfd);
+ }
+ pthread_mutex_lock (&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast (&fs->cond);
+
+ return newfd;
+}
+
+
+fd_t *
+__glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ if (glfd->fd->inode->table->xl == subvol)
+ return fd_ref (glfd->fd);
+
+ fd = __glfs_migrate_fd (fs, subvol, glfd);
+ if (!fd)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ fd_unref (glfd->fd);
+ glfd->fd = fd_ref (fd);
+ }
+
+ return fd;
+}
+
+
+fd_t *
+glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ glfs_lock (fs);
+ {
+ fd = __glfs_resolve_fd (fs, subvol, glfd);
+ }
+ glfs_unlock (fs);
+
+ return fd;
+}
+
+
+void
+__glfs_migrate_openfds (struct glfs *fs, xlator_t *subvol)
+{
+ struct glfs_fd *glfd = NULL;
+ fd_t *fd = NULL;
+
+ list_for_each_entry (glfd, &fs->openfds, openfds) {
+ if (uuid_is_null (glfd->fd->inode->gfid)) {
+ gf_log (fs->volname, GF_LOG_INFO,
+ "skipping openfd %p/%p in graph %s (%d)",
+ glfd, glfd->fd, graphid_str(subvol),
+ subvol->graph->id);
+ /* create in progress, defer */
+ continue;
+ }
+
+ fd = __glfs_migrate_fd (fs, subvol, glfd);
+ if (fd) {
+ fd_unref (glfd->fd);
+ glfd->fd = fd;
+ }
+ }
+}
+
+
+xlator_t *
+__glfs_active_subvol (struct glfs *fs)
+{
+ xlator_t *new_subvol = NULL;
+ int ret = -1;
+ inode_t *new_cwd = NULL;
+
+ if (!fs->next_subvol)
+ return fs->active_subvol;
+
+ new_subvol = fs->next_subvol;
+
+ ret = __glfs_first_lookup (fs, new_subvol);
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_INFO,
+ "first lookup on graph %s (%d) failed (%s)",
+ graphid_str (new_subvol), new_subvol->graph->id,
+ strerror (errno));
+ return NULL;
+ }
+
+ if (fs->cwd) {
+ new_cwd = __glfs_refresh_inode (fs, new_subvol, fs->cwd);
+
+ if (!new_cwd) {
+ char buf1[64];
+ gf_log (fs->volname, GF_LOG_INFO,
+ "cwd refresh of %s graph %s (%d) failed (%s)",
+ uuid_utoa_r (fs->cwd->gfid, buf1),
+ graphid_str (new_subvol),
+ new_subvol->graph->id, strerror (errno));
+ return NULL;
+ }
+ }
+
+ __glfs_migrate_openfds (fs, new_subvol);
+
+ /* switching @active_subvol and @cwd
+ should be atomic
+ */
+ fs->old_subvol = fs->active_subvol;
+ fs->active_subvol = fs->next_subvol;
+ fs->next_subvol = NULL;
+
+ if (new_cwd) {
+ __glfs_cwd_set (fs, new_cwd);
+ inode_unref (new_cwd);
+ }
+
+ gf_log (fs->volname, GF_LOG_INFO, "switched to graph %s (%d)",
+ graphid_str (new_subvol), new_subvol->graph->id);
+
+ return new_subvol;
+}
+
+xlator_t *
+glfs_active_subvol (struct glfs *fs)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *old_subvol = NULL;
+
+ glfs_lock (fs);
+ {
+ subvol = __glfs_active_subvol (fs);
+
+ if (subvol)
+ subvol->winds++;
+
+ if (fs->old_subvol) {
+ old_subvol = fs->old_subvol;
+ fs->old_subvol = NULL;
+ old_subvol->switched = 1;
+ }
+ }
+ glfs_unlock (fs);
+
+ if (old_subvol)
+ glfs_subvol_done (fs, old_subvol);
+
+ return subvol;
+}
+
+
+void
+glfs_subvol_done (struct glfs *fs, xlator_t *subvol)
+{
+ int ref = 0;
+ xlator_t *active_subvol = NULL;
+
+ if (!subvol)
+ return;
+
+ glfs_lock (fs);
+ {
+ ref = (--subvol->winds);
+ active_subvol = fs->active_subvol;
+ }
+ glfs_unlock (fs);
+
+ if (ref == 0) {
+ assert (subvol != active_subvol);
+ xlator_notify (subvol, GF_EVENT_PARENT_DOWN, subvol, NULL);
+ }
+}
+
+
+int
+__glfs_cwd_set (struct glfs *fs, inode_t *inode)
+{
+ if (inode->table->xl != fs->active_subvol) {
+ inode = __glfs_refresh_inode (fs, fs->active_subvol, inode);
+ if (!inode)
+ return -1;
+ } else {
+ inode_ref (inode);
+ }
+
+ if (fs->cwd)
+ inode_unref (fs->cwd);
+
+ fs->cwd = inode;
+
+ return 0;
+}
+
+
+int
+glfs_cwd_set (struct glfs *fs, inode_t *inode)
+{
+ int ret = 0;
+
+ glfs_lock (fs);
+ {
+ ret = __glfs_cwd_set (fs, inode);
+ }
+ glfs_unlock (fs);
+
+ return ret;
+}
+
+
+inode_t *
+__glfs_cwd_get (struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ if (!fs->cwd)
+ return NULL;
+
+ if (fs->cwd->table->xl == fs->active_subvol) {
+ cwd = inode_ref (fs->cwd);
+ return cwd;
+ }
+
+ cwd = __glfs_refresh_inode (fs, fs->active_subvol, fs->cwd);
+
+ return cwd;
+}
+
+inode_t *
+glfs_cwd_get (struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ glfs_lock (fs);
+ {
+ cwd = __glfs_cwd_get (fs);
+ }
+ glfs_unlock (fs);
+
+ return cwd;
+}
+
+inode_t *
+__glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+
+ if (object->inode->table->xl == subvol)
+ return inode_ref (object->inode);
+
+ inode = __glfs_refresh_inode (fs, fs->active_subvol,
+ object->inode);
+ if (!inode)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ inode_unref (object->inode);
+ object->inode = inode_ref (inode);
+ }
+
+ return inode;
+}
+
+inode_t *
+glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+
+ glfs_lock (fs);
+ {
+ inode = __glfs_resolve_inode(fs, subvol, object);
+ }
+ glfs_unlock (fs);
+
+ return inode;
+}
+
+int
+glfs_create_object (loc_t *loc, struct glfs_object **retobject)
+{
+ struct glfs_object *object = NULL;
+
+ object = GF_CALLOC (1, sizeof(struct glfs_object),
+ glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ object->inode = loc->inode;
+ uuid_copy (object->gfid, object->inode->gfid);
+
+ /* we hold the reference */
+ loc->inode = NULL;
+
+ *retobject = object;
+
+ return 0;
+}
diff --git a/api/src/glfs.c b/api/src/glfs.c
new file mode 100644
index 000000000..29ed47c0c
--- /dev/null
+++ b/api/src/glfs.c
@@ -0,0 +1,673 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+/*
+ TODO:
+ - merge locks in glfs_posix_lock for lock self-healing
+ - set proper pid/lk_owner to call frames (currently buried in syncop)
+ - fix logging.c/h to store logfp and loglevel in glusterfs_ctx_t and
+ reach it via THIS.
+ - update syncop functions to accept/return xdata. ???
+ - protocol/client to reconnect immediately after portmap disconnect.
+ - handle SEEK_END failure in _lseek()
+ - handle umask (per filesystem?)
+ - make itables LRU based
+ - 0-copy for readv/writev
+ - reconcile the open/creat mess
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <limits.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "stack.h"
+#include "event.h"
+#include "glfs-mem-types.h"
+#include "common-utils.h"
+#include "syncop.h"
+#include "call-stub.h"
+
+#include "glfs.h"
+#include "glfs-internal.h"
+#include "hashfn.h"
+#include "rpc-clnt.h"
+
+
+static gf_boolean_t
+vol_assigned (cmd_args_t *args)
+{
+ return args->volfile || args->volfile_server;
+}
+
+
+static int
+glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
+{
+ call_pool_t *pool = NULL;
+ int ret = -1;
+
+ xlator_mem_acct_init (THIS, glfs_mt_end + 1);
+
+ ctx->process_uuid = generate_glusterfs_ctx_id ();
+ if (!ctx->process_uuid) {
+ goto err;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new ();
+ if (!ctx->iobuf_pool) {
+ goto err;
+ }
+
+ ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
+ if (!ctx->event_pool) {
+ goto err;
+ }
+
+ ctx->env = syncenv_new (0, 0, 0);
+ if (!ctx->env) {
+ goto err;
+ }
+
+ pool = GF_CALLOC (1, sizeof (call_pool_t),
+ glfs_mt_call_pool_t);
+ if (!pool) {
+ goto err;
+ }
+
+ /* frame_mem_pool size 112 * 4k */
+ pool->frame_mem_pool = mem_pool_new (call_frame_t, 4096);
+ if (!pool->frame_mem_pool) {
+ goto err;
+ }
+ /* stack_mem_pool size 256 * 1024 */
+ pool->stack_mem_pool = mem_pool_new (call_stack_t, 1024);
+ if (!pool->stack_mem_pool) {
+ goto err;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new (call_stub_t, 1024);
+ if (!ctx->stub_mem_pool) {
+ goto err;
+ }
+
+ ctx->dict_pool = mem_pool_new (dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
+ if (!ctx->dict_pool)
+ goto err;
+
+ ctx->dict_pair_pool = mem_pool_new (data_pair_t,
+ GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
+ if (!ctx->dict_pair_pool)
+ goto err;
+
+ ctx->dict_data_pool = mem_pool_new (data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
+ if (!ctx->dict_data_pool)
+ goto err;
+
+ INIT_LIST_HEAD (&pool->all_frames);
+ INIT_LIST_HEAD (&ctx->cmd_args.xlator_options);
+ LOCK_INIT (&pool->lock);
+ ctx->pool = pool;
+
+ pthread_mutex_init (&(ctx->lock), NULL);
+
+ ret = 0;
+err:
+ if (ret && pool) {
+ if (pool->frame_mem_pool)
+ mem_pool_destroy (pool->frame_mem_pool);
+ if (pool->stack_mem_pool)
+ mem_pool_destroy (pool->stack_mem_pool);
+ GF_FREE (pool);
+ }
+
+ if (ret && ctx) {
+ if (ctx->stub_mem_pool)
+ mem_pool_destroy (ctx->stub_mem_pool);
+ if (ctx->dict_pool)
+ mem_pool_destroy (ctx->dict_pool);
+ if (ctx->dict_data_pool)
+ mem_pool_destroy (ctx->dict_data_pool);
+ if (ctx->dict_pair_pool)
+ mem_pool_destroy (ctx->dict_pair_pool);
+ }
+
+ return ret;
+}
+
+
+static int
+create_master (struct glfs *fs)
+{
+ int ret = 0;
+ xlator_t *master = NULL;
+
+ master = GF_CALLOC (1, sizeof (*master),
+ glfs_mt_xlator_t);
+ if (!master)
+ goto err;
+
+ master->name = gf_strdup ("gfapi");
+ if (!master->name)
+ goto err;
+
+ if (xlator_set_type (master, "mount/api") == -1) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "master xlator for %s initialization failed",
+ fs->volname);
+ goto err;
+ }
+
+ master->ctx = fs->ctx;
+ master->private = fs;
+ master->options = get_new_dict ();
+ if (!master->options)
+ goto err;
+
+
+ ret = xlator_init (master);
+ if (ret) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "failed to initialize gfapi translator");
+ goto err;
+ }
+
+ fs->ctx->master = master;
+ THIS = master;
+
+ return 0;
+
+err:
+ if (master) {
+ xlator_destroy (master);
+ }
+
+ return -1;
+}
+
+
+static FILE *
+get_volfp (struct glfs *fs)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ FILE *specfp = NULL;
+ struct stat statbuf;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ ret = lstat (cmd_args->volfile, &statbuf);
+ if (ret == -1) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "%s: %s", cmd_args->volfile, strerror (errno));
+ return NULL;
+ }
+
+ if ((specfp = fopen (cmd_args->volfile, "r")) == NULL) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "volume file %s: %s",
+ cmd_args->volfile,
+ strerror (errno));
+ return NULL;
+ }
+
+ gf_log ("glfs", GF_LOG_DEBUG,
+ "loading volume file %s", cmd_args->volfile);
+
+ return specfp;
+}
+
+
+int
+glfs_volumes_init (struct glfs *fs)
+{
+ FILE *fp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (!vol_assigned (cmd_args))
+ return -1;
+
+ if (cmd_args->volfile_server) {
+ ret = glfs_mgmt_init (fs);
+ goto out;
+ }
+
+ fp = get_volfp (fs);
+
+ if (!fp) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "Cannot reach volume specification file");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glfs_process_volfp (fs, fp);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+
+
+int
+glfs_set_xlator_option (struct glfs *fs, const char *xlator, const char *key,
+ const char *value)
+{
+ xlator_cmdline_option_t *option = NULL;
+
+ option = GF_CALLOC (1, sizeof (*option),
+ glfs_mt_xlator_cmdline_option_t);
+ if (!option)
+ goto enomem;
+
+ INIT_LIST_HEAD (&option->cmd_args);
+
+ option->volume = gf_strdup (xlator);
+ if (!option->volume)
+ goto enomem;
+ option->key = gf_strdup (key);
+ if (!option->key)
+ goto enomem;
+ option->value = gf_strdup (value);
+ if (!option->value)
+ goto enomem;
+
+ list_add (&option->cmd_args, &fs->ctx->cmd_args.xlator_options);
+
+ return 0;
+enomem:
+ errno = ENOMEM;
+
+ if (!option)
+ return -1;
+
+ GF_FREE (option->volume);
+ GF_FREE (option->key);
+ GF_FREE (option->value);
+ GF_FREE (option);
+
+ return -1;
+}
+
+int glfs_setfsuid (uid_t fsuid)
+{
+ return syncopctx_setfsuid (&fsuid);
+}
+
+int glfs_setfsgid (gid_t fsgid)
+{
+ return syncopctx_setfsgid (&fsgid);
+}
+
+int glfs_setfsgroups (size_t size, const gid_t *list)
+{
+ return syncopctx_setfsgroups(size, list);
+}
+
+struct glfs *
+glfs_from_glfd (struct glfs_fd *glfd)
+{
+ return glfd->fs;
+}
+
+
+struct glfs_fd *
+glfs_fd_new (struct glfs *fs)
+{
+ struct glfs_fd *glfd = NULL;
+
+ glfd = GF_CALLOC (1, sizeof (*glfd), glfs_mt_glfs_fd_t);
+ if (!glfd)
+ return NULL;
+
+ glfd->fs = fs;
+
+ INIT_LIST_HEAD (&glfd->openfds);
+
+ return glfd;
+}
+
+
+void
+glfs_fd_bind (struct glfs_fd *glfd)
+{
+ struct glfs *fs = NULL;
+
+ fs = glfd->fs;
+
+ glfs_lock (fs);
+ {
+ list_add_tail (&glfd->openfds, &fs->openfds);
+ }
+ glfs_unlock (fs);
+}
+
+void
+glfs_fd_destroy (struct glfs_fd *glfd)
+{
+ if (!glfd)
+ return;
+
+ glfs_lock (glfd->fs);
+ {
+ list_del_init (&glfd->openfds);
+ }
+ glfs_unlock (glfd->fs);
+
+ if (glfd->fd)
+ fd_unref (glfd->fd);
+
+ GF_FREE (glfd->readdirbuf);
+
+ GF_FREE (glfd);
+}
+
+
+static void *
+glfs_poller (void *data)
+{
+ struct glfs *fs = NULL;
+
+ fs = data;
+
+ event_dispatch (fs->ctx->event_pool);
+
+ return NULL;
+}
+
+
+struct glfs *
+glfs_new (const char *volname)
+{
+ struct glfs *fs = NULL;
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx) {
+ return NULL;
+ }
+
+#ifdef DEBUG
+ gf_mem_acct_enable_set (ctx);
+#endif
+
+ /* first globals init, for gf_mem_acct_enable_set () */
+ ret = glusterfs_globals_init (ctx);
+ if (ret)
+ return NULL;
+
+ THIS->ctx = ctx;
+
+ /* then ctx_defaults_init, for xlator_mem_acct_init(THIS) */
+ ret = glusterfs_ctx_defaults_init (ctx);
+ if (ret)
+ return NULL;
+
+ fs = GF_CALLOC (1, sizeof (*fs), glfs_mt_glfs_t);
+ if (!fs)
+ return NULL;
+ fs->ctx = ctx;
+
+ glfs_set_logging (fs, "/dev/null", 0);
+
+ fs->ctx->cmd_args.volfile_id = gf_strdup (volname);
+
+ fs->volname = gf_strdup (volname);
+
+ pthread_mutex_init (&fs->mutex, NULL);
+ pthread_cond_init (&fs->cond, NULL);
+
+ INIT_LIST_HEAD (&fs->openfds);
+
+ return fs;
+}
+
+
+int
+glfs_set_volfile (struct glfs *fs, const char *volfile)
+{
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (vol_assigned (cmd_args))
+ return -1;
+
+ cmd_args->volfile = gf_strdup (volfile);
+
+ return 0;
+}
+
+
+int
+glfs_set_volfile_server (struct glfs *fs, const char *transport,
+ const char *host, int port)
+{
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (vol_assigned (cmd_args))
+ return -1;
+
+ cmd_args->volfile_server = gf_strdup (host);
+ cmd_args->volfile_server_transport = gf_strdup (transport);
+ cmd_args->volfile_server_port = port;
+ cmd_args->max_connect_attempts = 2;
+
+ return 0;
+}
+
+
+int
+glfs_set_logging (struct glfs *fs, const char *logfile, int loglevel)
+{
+ int ret = 0;
+ char *tmplog = NULL;
+
+ if (!logfile) {
+ ret = gf_set_log_file_path (&fs->ctx->cmd_args);
+ if (ret)
+ goto out;
+ tmplog = fs->ctx->cmd_args.log_file;
+ } else {
+ tmplog = (char *)logfile;
+ }
+
+ ret = gf_log_init (fs->ctx, tmplog, NULL);
+ if (ret)
+ goto out;
+
+ if (loglevel >= 0)
+ gf_log_set_loglevel (loglevel);
+
+out:
+ return ret;
+}
+
+
+int
+glfs_init_wait (struct glfs *fs)
+{
+ int ret = -1;
+
+ /* Always a top-down call, use glfs_lock() */
+ glfs_lock (fs);
+ {
+ while (!fs->init)
+ pthread_cond_wait (&fs->cond,
+ &fs->mutex);
+ ret = fs->ret;
+ errno = fs->err;
+ }
+ glfs_unlock (fs);
+
+ return ret;
+}
+
+
+void
+glfs_init_done (struct glfs *fs, int ret)
+{
+ glfs_init_cbk init_cbk;
+
+ if (!fs) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "fs is NULL");
+ goto out;
+ }
+
+ init_cbk = fs->init_cbk;
+
+ /* Always a bottom-up call, use mutex_lock() */
+ pthread_mutex_lock (&fs->mutex);
+ {
+ fs->init = 1;
+ fs->ret = ret;
+ fs->err = errno;
+
+ if (!init_cbk)
+ pthread_cond_broadcast (&fs->cond);
+ }
+ pthread_mutex_unlock (&fs->mutex);
+
+ if (init_cbk)
+ init_cbk (fs, ret);
+out:
+ return;
+}
+
+
+int
+glfs_init_common (struct glfs *fs)
+{
+ int ret = -1;
+
+ ret = create_master (fs);
+ if (ret)
+ return ret;
+
+ ret = gf_thread_create (&fs->poller, NULL, glfs_poller, fs);
+ if (ret)
+ return ret;
+
+ ret = glfs_volumes_init (fs);
+ if (ret)
+ return ret;
+
+ fs->dev_id = gf_dm_hashfn (fs->volname, strlen (fs->volname));
+ return ret;
+}
+
+
+int
+glfs_init_async (struct glfs *fs, glfs_init_cbk cbk)
+{
+ int ret = -1;
+
+ fs->init_cbk = cbk;
+
+ ret = glfs_init_common (fs);
+
+ return ret;
+}
+
+
+int
+glfs_init (struct glfs *fs)
+{
+ int ret = -1;
+
+ ret = glfs_init_common (fs);
+ if (ret)
+ return ret;
+
+ ret = glfs_init_wait (fs);
+
+ return ret;
+}
+
+
+int
+glfs_fini (struct glfs *fs)
+{
+ int ret = -1;
+ int countdown = 100;
+ xlator_t *subvol = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ call_pool_t *call_pool = NULL;
+
+ ctx = fs->ctx;
+
+ if (ctx->mgmt) {
+ rpc_clnt_disable (ctx->mgmt);
+ ctx->mgmt = NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ call_pool = fs->ctx->pool;
+
+ while (countdown--) {
+ /* give some time for background frames to finish */
+ if (!call_pool->cnt)
+ break;
+ usleep (100000);
+ }
+ /* leaked frames may exist, we ignore */
+
+ /*We deem glfs_fini as successful if there are no pending frames in the call
+ *pool*/
+ ret = (call_pool->cnt == 0)? 0: -1;
+
+ subvol = glfs_active_subvol (fs);
+ if (subvol) {
+ /* PARENT_DOWN within glfs_subvol_done() is issued only
+ on graph switch (new graph should activiate and
+ decrement the extra @winds count taken in glfs_graph_setup()
+
+ Since we are explicitly destroying, PARENT_DOWN is necessary
+ */
+ xlator_notify (subvol, GF_EVENT_PARENT_DOWN, subvol, 0);
+ /* TBD: wait for CHILD_DOWN before exiting, in case of
+ asynchronous cleanup like graceful socket disconnection
+ in the future.
+ */
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ if (ctx->log.logfile)
+ fclose (ctx->log.logfile);
+
+ return ret;
+}
diff --git a/api/src/glfs.h b/api/src/glfs.h
new file mode 100644
index 000000000..18fda496e
--- /dev/null
+++ b/api/src/glfs.h
@@ -0,0 +1,581 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _GLFS_H
+#define _GLFS_H
+
+/*
+ Enforce the following flags as libgfapi is built
+ with them, and we want programs linking against them to also
+ be built with these flags. This is necessary as it affects
+ some of the structures defined in libc headers (like struct stat)
+ and those definitions need to be consistently compiled in
+ both the library and the application.
+*/
+
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
+#ifndef __USE_FILE_OFFSET64
+#define __USE_FILE_OFFSET64
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <sys/cdefs.h>
+#include <dirent.h>
+#include <sys/statvfs.h>
+
+__BEGIN_DECLS
+
+/* The filesystem object. One object per 'virtual mount' */
+struct glfs;
+typedef struct glfs glfs_t;
+
+
+/*
+ SYNOPSIS
+
+ glfs_new: Create a new 'virtual mount' object.
+
+ DESCRIPTION
+
+ This is most likely the very first function you will use. This function
+ will create a new glfs_t (virtual mount) object in memory.
+
+ On this newly created glfs_t, you need to be either set a volfile path
+ (glfs_set_volfile) or a volfile server (glfs_set_volfile_server).
+
+ The glfs_t object needs to be initialized with glfs_init() before you
+ can start issuing file operations on it.
+
+ PARAMETERS
+
+ @volname: Name of the volume. This identifies the server-side volume and
+ the fetched volfile (equivalent of --volfile-id command line
+ parameter to glusterfsd). When used with glfs_set_volfile() the
+ @volname has no effect (except for appearing in log messages).
+
+ RETURN VALUES
+
+ NULL : Out of memory condition.
+ Others : Pointer to the newly created glfs_t virtual mount object.
+
+*/
+
+glfs_t *glfs_new (const char *volname);
+
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile: Specify the path to the volume specification file.
+
+ DESCRIPTION
+
+ If you are using a static volume specification file (without dynamic
+ volume management abilities from the CLI), then specify the path to
+ the volume specification file.
+
+ This is incompatible with glfs_set_volfile_server().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @volfile: Path to the locally available volume specification file.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_set_volfile (glfs_t *fs, const char *volfile);
+
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile_server: Specify the address of management server.
+
+ DESCRIPTION
+
+ This function specifies the address of the management server (glusterd)
+ to connect, and establish the volume configuration. The @volname
+ parameter passed to glfs_new() is the volume which will be virtually
+ mounted as the glfs_t object. All operations performed by the CLI at
+ the management server will automatically be reflected in the 'virtual
+ mount' object as it maintains a connection to glusterd and polls on
+ configuration change notifications.
+
+ This is incompatible with glfs_set_volfile().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @transport: String specifying the transport used to connect to the
+ management daemon. Specifying NULL will result in the usage
+ of the default (tcp) transport type. Permitted values
+ are those what you specify as transport-type in a volume
+ specification file (e.g "tcp", "rdma", "unix".)
+
+ @host: String specifying the address of where to find the management
+ daemon. Depending on the transport type this would either be
+ an FQDN (e.g: "storage01.company.com"), ASCII encoded IP
+ address "192.168.22.1", or a UNIX domain socket path (e.g
+ "/tmp/glusterd.socket".)
+
+ @port: The TCP port number where gluster management daemon is listening.
+ Specifying 0 uses the default port number GF_DEFAULT_BASE_PORT.
+ This parameter is unused if you are using a UNIX domain socket.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_set_volfile_server (glfs_t *fs, const char *transport,
+ const char *host, int port);
+
+
+/*
+ SYNOPSIS
+
+ glfs_set_logging: Specify logging parameters.
+
+ DESCRIPTION
+
+ This function specifies logging parameters for the virtual mount.
+ Default log file is /dev/null.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the logging parameters.
+
+ @logfile: The logfile to be used for logging. Will be created if it does not
+ already exist (provided system permissions allow). If NULL, a new
+ logfile will be created in default log directory associated with
+ the glusterfs installation.
+
+ @loglevel: Numerical value specifying the degree of verbosity. Higher the
+ value, more verbose the logging.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_set_logging (glfs_t *fs, const char *logfile, int loglevel);
+
+
+/*
+ SYNOPSIS
+
+ glfs_init: Initialize the 'virtual mount'
+
+ DESCRIPTION
+
+ This function initializes the glfs_t object. This consists of many steps:
+ - Spawn a poll-loop thread.
+ - Establish connection to management daemon and receive volume specification.
+ - Construct translator graph and initialize graph.
+ - Wait for initialization (connecting to all bricks) to complete.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_init (glfs_t *fs);
+
+
+/*
+ SYNOPSIS
+
+ glfs_fini: Cleanup and destroy the 'virtual mount'
+
+ DESCRIPTION
+
+ This function attempts to gracefully destroy glfs_t object. An attempt is
+ made to wait for all background processing to complete before returning.
+
+ glfs_fini() must be called after all operations on glfs_t is finished.
+
+ IMPORTANT
+
+ IT IS NECESSARY TO CALL glfs_fini() ON ALL THE INITIALIZED glfs_t
+ OBJECTS BEFORE TERMINATING THE PROGRAM. THERE MAY BE CACHED AND
+ UNWRITTEN / INCOMPLETE OPERATIONS STILL IN PROGRESS EVEN THOUGH THE
+ API CALLS HAVE RETURNED. glfs_fini() WILL WAIT FOR BACKGROUND OPERATIONS
+ TO COMPLETE BEFORE RETURNING, THEREBY MAKING IT SAFE FOR THE PROGRAM TO
+ EXIT.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be destroyed.
+
+ RETURN VALUES
+
+ 0 : Success.
+*/
+
+int glfs_fini (glfs_t *fs);
+
+/*
+ * FILE OPERATION
+ *
+ * What follows are filesystem operations performed on the
+ * 'virtual mount'. The calls here are kept as close to
+ * the POSIX system calls as possible.
+ *
+ * Notes:
+ *
+ * - All paths specified, even if absolute, are relative to the
+ * root of the virtual mount and not the system root (/).
+ *
+ */
+
+/* The file descriptor object. One per open file/directory. */
+
+struct glfs_fd;
+typedef struct glfs_fd glfs_fd_t;
+
+/*
+ * PER THREAD IDENTITY MODIFIERS
+ *
+ * The following operations enable to set a per thread identity context
+ * for the glfs APIs to perform operations as. The calls here are kept as close
+ * to POSIX equivalents as possible.
+ *
+ * NOTES:
+ *
+ * - setgroups is a per thread setting, hence this is named as fsgroups to be
+ * close in naming to the fs(u/g)id APIs
+ * - Typical mode of operation is to set the IDs as required, with the
+ * supplementary groups being optionally set, make the glfs call and post the
+ * glfs operation set them back to eu/gid or uid/gid as appropriate to the
+ * caller
+ * - The groups once set, need to be unset by setting the size to 0 (in which
+ * case the list argument is a do not care)
+ * - Once a process for a thread of operation choses to set the IDs, all glfs
+ * calls made from that thread would default to the IDs set for the thread.
+ * As a result use these APIs with care and ensure that the set IDs are
+ * reverted to global process defaults as required.
+ *
+ */
+int glfs_setfsuid (uid_t fsuid);
+int glfs_setfsgid (gid_t fsgid);
+int glfs_setfsgroups (size_t size, const gid_t *list);
+
+/*
+ SYNOPSIS
+
+ glfs_open: Open a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @flags: Open flags. See open(2). O_CREAT is not supported.
+ Use glfs_creat() for creating files.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *glfs_open (glfs_t *fs, const char *path, int flags);
+
+
+/*
+ SYNOPSIS
+
+ glfs_creat: Create a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @mode: Permission of the file to be created.
+
+ @flags: Create flags. See open(2). O_EXCL is supported.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *glfs_creat (glfs_t *fs, const char *path, int flags,
+ mode_t mode);
+
+int glfs_close (glfs_fd_t *fd);
+
+glfs_t *glfs_from_glfd (glfs_fd_t *fd);
+
+int glfs_set_xlator_option (glfs_t *fs, const char *xlator, const char *key,
+ const char *value);
+
+/*
+
+ glfs_io_cbk
+
+ The following is the function type definition of the callback
+ function pointer which has to be provided by the caller to the
+ *_async() versions of the IO calls.
+
+ The callback function is called on completion of the requested
+ IO, and the appropriate return value is returned in @ret.
+
+ In case of an error in completing the IO, @ret will be -1 and
+ @errno will be set with the appropriate error.
+
+ @ret will be same as the return value of the non _async() variant
+ of the particular call
+
+ @data is the same context pointer provided by the caller at the
+ time of issuing the async IO call. This can be used by the
+ caller to differentiate different instances of the async requests
+ in a common callback function.
+*/
+
+typedef void (*glfs_io_cbk) (glfs_fd_t *fd, ssize_t ret, void *data);
+
+// glfs_{read,write}[_async]
+
+ssize_t glfs_read (glfs_fd_t *fd, void *buf, size_t count, int flags);
+ssize_t glfs_write (glfs_fd_t *fd, const void *buf, size_t count, int flags);
+int glfs_read_async (glfs_fd_t *fd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data);
+int glfs_write_async (glfs_fd_t *fd, const void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data);
+
+// glfs_{read,write}v[_async]
+
+ssize_t glfs_readv (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags);
+ssize_t glfs_writev (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags);
+int glfs_readv_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data);
+int glfs_writev_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data);
+
+// glfs_p{read,write}[_async]
+
+ssize_t glfs_pread (glfs_fd_t *fd, void *buf, size_t count, off_t offset,
+ int flags);
+ssize_t glfs_pwrite (glfs_fd_t *fd, const void *buf, size_t count,
+ off_t offset, int flags);
+int glfs_pread_async (glfs_fd_t *fd, void *buf, size_t count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data);
+int glfs_pwrite_async (glfs_fd_t *fd, const void *buf, int count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data);
+
+// glfs_p{read,write}v[_async]
+
+ssize_t glfs_preadv (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ off_t offset, int flags);
+ssize_t glfs_pwritev (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ off_t offset, int flags);
+int glfs_preadv_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data);
+int glfs_pwritev_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data);
+
+
+off_t glfs_lseek (glfs_fd_t *fd, off_t offset, int whence);
+
+int glfs_truncate (glfs_t *fs, const char *path, off_t length);
+
+int glfs_ftruncate (glfs_fd_t *fd, off_t length);
+int glfs_ftruncate_async (glfs_fd_t *fd, off_t length, glfs_io_cbk fn,
+ void *data);
+
+int glfs_lstat (glfs_t *fs, const char *path, struct stat *buf);
+int glfs_stat (glfs_t *fs, const char *path, struct stat *buf);
+int glfs_fstat (glfs_fd_t *fd, struct stat *buf);
+
+int glfs_fsync (glfs_fd_t *fd);
+int glfs_fsync_async (glfs_fd_t *fd, glfs_io_cbk fn, void *data);
+
+int glfs_fdatasync (glfs_fd_t *fd);
+int glfs_fdatasync_async (glfs_fd_t *fd, glfs_io_cbk fn, void *data);
+
+int glfs_access (glfs_t *fs, const char *path, int mode);
+
+int glfs_symlink (glfs_t *fs, const char *oldpath, const char *newpath);
+
+int glfs_readlink (glfs_t *fs, const char *path, char *buf, size_t bufsiz);
+
+int glfs_mknod (glfs_t *fs, const char *path, mode_t mode, dev_t dev);
+
+int glfs_mkdir (glfs_t *fs, const char *path, mode_t mode);
+
+int glfs_unlink (glfs_t *fs, const char *path);
+
+int glfs_rmdir (glfs_t *fs, const char *path);
+
+int glfs_rename (glfs_t *fs, const char *oldpath, const char *newpath);
+
+int glfs_link (glfs_t *fs, const char *oldpath, const char *newpath);
+
+glfs_fd_t *glfs_opendir (glfs_t *fs, const char *path);
+
+/*
+ * @glfs_readdir_r and @glfs_readdirplus_r ARE thread safe AND re-entrant,
+ * but the interface has ambiguity about the size of @dirent to be allocated
+ * before calling the APIs. 512 byte buffer (for @dirent) is sufficient for
+ * all known systems which are tested againt glusterfs/gfapi, but may be
+ * insufficient in the future.
+ */
+
+int glfs_readdir_r (glfs_fd_t *fd, struct dirent *dirent,
+ struct dirent **result);
+
+int glfs_readdirplus_r (glfs_fd_t *fd, struct stat *stat, struct dirent *dirent,
+ struct dirent **result);
+
+/*
+ * @glfs_readdir and @glfs_readdirplus are NEITHER thread safe NOR re-entrant
+ * when called on the same directory handle. However they ARE thread safe
+ * AND re-entrant when called on different directory handles (which may be
+ * referring to the same directory too.)
+ */
+
+struct dirent *glfs_readdir (glfs_fd_t *fd);
+
+struct dirent *glfs_readdirplus (glfs_fd_t *fd, struct stat *stat);
+
+long glfs_telldir (glfs_fd_t *fd);
+
+void glfs_seekdir (glfs_fd_t *fd, long offset);
+
+int glfs_closedir (glfs_fd_t *fd);
+
+int glfs_statvfs (glfs_t *fs, const char *path, struct statvfs *buf);
+
+int glfs_chmod (glfs_t *fs, const char *path, mode_t mode);
+
+int glfs_fchmod (glfs_fd_t *fd, mode_t mode);
+
+int glfs_chown (glfs_t *fs, const char *path, uid_t uid, gid_t gid);
+
+int glfs_lchown (glfs_t *fs, const char *path, uid_t uid, gid_t gid);
+
+int glfs_fchown (glfs_fd_t *fd, uid_t uid, gid_t gid);
+
+int glfs_utimens (glfs_t *fs, const char *path, struct timespec times[2]);
+
+int glfs_lutimens (glfs_t *fs, const char *path, struct timespec times[2]);
+
+int glfs_futimens (glfs_fd_t *fd, struct timespec times[2]);
+
+ssize_t glfs_getxattr (glfs_t *fs, const char *path, const char *name,
+ void *value, size_t size);
+
+ssize_t glfs_lgetxattr (glfs_t *fs, const char *path, const char *name,
+ void *value, size_t size);
+
+ssize_t glfs_fgetxattr (glfs_fd_t *fd, const char *name,
+ void *value, size_t size);
+
+ssize_t glfs_listxattr (glfs_t *fs, const char *path, void *value, size_t size);
+
+ssize_t glfs_llistxattr (glfs_t *fs, const char *path, void *value,
+ size_t size);
+
+ssize_t glfs_flistxattr (glfs_fd_t *fd, void *value, size_t size);
+
+int glfs_setxattr (glfs_t *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags);
+
+int glfs_lsetxattr (glfs_t *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags);
+
+int glfs_fsetxattr (glfs_fd_t *fd, const char *name,
+ const void *value, size_t size, int flags);
+
+int glfs_removexattr (glfs_t *fs, const char *path, const char *name);
+
+int glfs_lremovexattr (glfs_t *fs, const char *path, const char *name);
+
+int glfs_fremovexattr (glfs_fd_t *fd, const char *name);
+
+int glfs_fallocate(glfs_fd_t *fd, int keep_size, off_t offset, size_t len);
+
+int glfs_discard(glfs_fd_t *fd, off_t offset, size_t len);
+
+
+int glfs_discard_async (glfs_fd_t *fd, off_t length, size_t lent,
+ glfs_io_cbk fn, void *data);
+
+int glfs_zerofill(glfs_fd_t *fd, off_t offset, size_t len);
+
+int glfs_zerofill_async (glfs_fd_t *fd, off_t length, size_t len,
+ glfs_io_cbk fn, void *data);
+
+char *glfs_getcwd (glfs_t *fs, char *buf, size_t size);
+
+int glfs_chdir (glfs_t *fs, const char *path);
+
+int glfs_fchdir (glfs_fd_t *fd);
+
+char *glfs_realpath (glfs_t *fs, const char *path, char *resolved_path);
+
+/*
+ * @cmd and @flock are as specified in man fcntl(2).
+ */
+int glfs_posix_lock (glfs_fd_t *fd, int cmd, struct flock *flock);
+
+glfs_fd_t *glfs_dup (glfs_fd_t *fd);
+
+__END_DECLS
+
+#endif /* !_GLFS_H */
diff --git a/argp-standalone/configure.ac b/argp-standalone/configure.ac
index 65ebc4518..2ecd2a801 100644
--- a/argp-standalone/configure.ac
+++ b/argp-standalone/configure.ac
@@ -8,7 +8,7 @@ AC_CONFIG_SRCDIR([argp-ba.c])
AC_CONFIG_AUX_DIR([.])
AM_INIT_AUTOMAKE
-AM_CONFIG_HEADER(config.h)
+AC_CONFIG_HEADERS(config.h)
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
@@ -22,7 +22,7 @@ AC_GNU_SOURCE
AC_PROG_CC
AC_PROG_MAKE_SET
AC_PROG_RANLIB
-AM_PROG_CC_STDC
+AC_PROG_CC
if test "x$am_cv_prog_cc_stdc" = xno ; then
AC_ERROR([the C compiler doesn't handle ANSI-C])
diff --git a/autogen.sh b/autogen.sh
index e20408bf2..f937e6be0 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,8 +1,105 @@
#!/bin/sh
-aclocal
-autoheader
-(libtoolize --automake --copy --force || glibtoolize --automake --copy --force)
-autoconf
-automake --add-missing --copy --foreign
+echo
+echo ... GlusterFS autogen ...
+echo
+
+## Check all dependencies are present
+MISSING=""
+
+# Check for aclocal
+env aclocal --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ ACLOCAL=aclocal
+else
+ MISSING="$MISSING aclocal"
+fi
+
+# Check for autoconf
+env autoconf --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOCONF=autoconf
+else
+ MISSING="$MISSING autoconf"
+fi
+
+# Check for autoheader
+env autoheader --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOHEADER=autoheader
+else
+ MISSING="$MISSING autoheader"
+fi
+
+# Check for automake
+env automake --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOMAKE=automake
+else
+ MISSING="$MISSING automake"
+fi
+
+# Check for libtoolize or glibtoolize
+env libtoolize --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ # libtoolize was found, so use it
+ TOOL=libtoolize
+else
+ # libtoolize wasn't found, so check for glibtoolize
+ env glibtoolize --version > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ TOOL=glibtoolize
+ else
+ MISSING="$MISSING libtoolize/glibtoolize"
+ fi
+fi
+
+# Check for tar
+env tar --version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+ MISSING="$MISSING tar"
+fi
+
+## If dependencies are missing, warn the user and abort
+if [ "x$MISSING" != "x" ]; then
+ echo "Aborting."
+ echo
+ echo "The following build tools are missing:"
+ echo
+ for pkg in $MISSING; do
+ echo " * $pkg"
+ done
+ echo
+ echo "Please install them and try again."
+ echo
+ exit 1
+fi
+
+## generate gf-error-codes.h from error-codes.json
+echo "Generate gf-error-codes.h ..."
+if ./gen-headers.py; then
+ if ! mv -fv gf-error-codes.h libglusterfs/src/gf-error-codes.h; then
+ exit 1
+ fi
+else
+ exit 1
+fi
+
+## Do the autogeneration
+echo Running ${ACLOCAL}...
+$ACLOCAL -I ./contrib/aclocal
+echo Running ${AUTOHEADER}...
+$AUTOHEADER
+echo Running ${TOOL}...
+$TOOL --automake --copy --force
+echo Running ${AUTOCONF}...
+$AUTOCONF
+echo Running ${AUTOMAKE}...
+$AUTOMAKE --add-missing --copy --foreign
+
+# Run autogen in the argp-standalone sub-directory
cd argp-standalone;./autogen.sh
+
+# Instruct user on next steps
+echo
+echo "Please proceed with configuring, compiling, and installing."
diff --git a/booster/Makefile.am b/booster/Makefile.am
deleted file mode 100644
index e1c45f305..000000000
--- a/booster/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS=src \ No newline at end of file
diff --git a/booster/src/Makefile.am b/booster/src/Makefile.am
deleted file mode 100644
index d7d83abf5..000000000
--- a/booster/src/Makefile.am
+++ /dev/null
@@ -1,21 +0,0 @@
-ldpreload_LTLIBRARIES = libglusterfs-booster.la
-ldpreloaddir = $(libdir)/glusterfs
-noinst_HEADERS = booster_fstab.h booster-fd.h
-libglusterfs_booster_la_SOURCES = booster.c booster_stat.c booster_fstab.c booster-fd.c
-libglusterfs_booster_la_CFLAGS = -I$(top_srcdir)/libglusterfsclient/src/ -D_GNU_SOURCE -D$(GF_HOST_OS) -fPIC -Wall \
- -pthread $(GF_BOOSTER_CFLAGS) -shared -nostartfiles
-libglusterfs_booster_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
- -I$(top_srcdir)/libglusterfsclient/src \
- -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(ARGP_STANDALONE_CPPFLAGS)
-
-libglusterfs_booster_la_LDFLAGS = -module -avoidversion
-libglusterfs_booster_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
-
-CLEANFILES =
-
-uninstall-local:
- rm -f $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
-
-install-data-hook:
- ln -sf libglusterfs-booster.so $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
diff --git a/booster/src/booster-fd.c b/booster/src/booster-fd.c
deleted file mode 100644
index fa5b0cde2..000000000
--- a/booster/src/booster-fd.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-
-#include "booster-fd.h"
-#include <logging.h>
-#include <mem-pool.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <common-utils.h>
-#include <string.h>
-
-#include <assert.h>
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-/*
- Allocate in memory chunks of power of 2 starting from 1024B
- Assumes fdtable->lock is held
- */
-static inline uint
-gf_roundup_power_of_two (uint nr)
-{
- uint result = 1;
-
- if (nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Negative number passed");
- return -1;
- }
-
- while (result <= nr)
- result *= 2;
-
- return result;
-}
-
-#define BOOSTER_NFDBITS (sizeof (unsigned long))
-
-#define BOOSTER_FDMASK(d) (1UL << ((d) % BOOSTER_NFDBITS))
-#define BOOSTER_FDELT(d) (d / BOOSTER_NFDBITS)
-#define BOOSTER_FD_SET(set, d) (set->fd_bits[BOOSTER_FDELT(d)] |= BOOSTER_FDMASK(d))
-#define BOOSTER_FD_CLR(set, d) (set->fd_bits[BOOSTER_FDELT(d)] &= ~BOOSTER_FDMASK(d))
-#define BOOSTER_FD_ISSET(set, d) (set->fd_bits[BOOSTER_FDELT(d)] & BOOSTER_FDMASK(d))
-
-inline int
-booster_get_close_on_exec (booster_fdtable_t *fdtable, int fd)
-{
- return BOOSTER_FD_ISSET(fdtable->close_on_exec, fd);
-}
-
-inline void
-booster_set_close_on_exec (booster_fdtable_t *fdtable, int fd)
-{
- BOOSTER_FD_SET(fdtable->close_on_exec, fd);
-}
-
-int
-booster_fdtable_expand (booster_fdtable_t *fdtable, uint nr)
-{
- fd_t **oldfds = NULL, **tmp = NULL;
- uint oldmax_fds = -1;
- uint cpy = 0;
- int32_t ret = -1, bytes = 0;
- booster_fd_set_t *oldclose_on_exec = NULL;
-
- if (fdtable == NULL || nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Invalid argument");
- errno = EINVAL;
- ret = -1;
- goto out;
- }
-
- nr /= (1024 / sizeof (fd_t *));
- nr = gf_roundup_power_of_two (nr + 1);
- nr *= (1024 / sizeof (fd_t *));
-
- oldfds = fdtable->fds;
- oldmax_fds = fdtable->max_fds;
- oldclose_on_exec = fdtable->close_on_exec;
-
- fdtable->fds = CALLOC (nr, sizeof (fd_t *));
- if (fdtable->fds == NULL) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Memory allocation failed");
- fdtable->fds = oldfds;
- oldfds = NULL;
- ret = -1;
- goto out;
- }
-
- fdtable->max_fds = nr;
-
- if (oldfds) {
- cpy = oldmax_fds * sizeof (fd_t *);
- memcpy (fdtable->fds, oldfds, cpy);
- }
-
- /* nr will be either less than 8 or a multiple of 8 */
- bytes = nr/8;
- bytes = bytes ? bytes : 1;
- fdtable->close_on_exec = CALLOC (bytes, 1);
- if (fdtable->close_on_exec == NULL) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Memory allocation "
- "failed");
- tmp = fdtable->fds;
- fdtable->fds = oldfds;
- oldfds = tmp;
- ret = -1;
- goto out;
- }
-
- if (oldclose_on_exec != NULL) {
- bytes = oldmax_fds/8;
- cpy = bytes ? bytes : 1;
- memcpy (fdtable->close_on_exec, oldclose_on_exec, cpy);
- }
- gf_log ("booster-fd", GF_LOG_TRACE, "FD-table expanded: Old: %d,New: %d"
- , oldmax_fds, nr);
- ret = 0;
-
-out:
- FREE (oldfds);
- FREE (oldclose_on_exec);
-
- return ret;
-}
-
-booster_fdtable_t *
-booster_fdtable_alloc (void)
-{
- booster_fdtable_t *fdtable = NULL;
- int32_t ret = -1;
-
- fdtable = CALLOC (1, sizeof (*fdtable));
- GF_VALIDATE_OR_GOTO ("booster-fd", fdtable, out);
-
- LOCK_INIT (&fdtable->lock);
-
- LOCK (&fdtable->lock);
- {
- ret = booster_fdtable_expand (fdtable, 0);
- }
- UNLOCK (&fdtable->lock);
-
- if (ret == -1) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD-table allocation "
- "failed");
- FREE (fdtable);
- fdtable = NULL;
- }
-
-out:
- return fdtable;
-}
-
-fd_t **
-__booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
-
- if (count == NULL)
- goto out;
-
- fds = fdtable->fds;
- fdtable->fds = calloc (fdtable->max_fds, sizeof (fd_t *));
- *count = fdtable->max_fds;
-
-out:
- return fds;
-}
-
-fd_t **
-booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
- if (!fdtable)
- return NULL;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, count);
- }
- UNLOCK (&fdtable->lock);
-
- return fds;
-}
-
-void
-booster_fdtable_destroy (booster_fdtable_t *fdtable)
-{
- fd_t *fd = NULL;
- fd_t **fds = NULL;
- uint fd_count = 0;
- int i = 0;
-
- if (!fdtable)
- return;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, &fd_count);
- FREE (fdtable->fds);
- }
- UNLOCK (&fdtable->lock);
-
- if (!fds)
- goto free_table;
-
- for (i = 0; i < fd_count; i++) {
- fd = fds[i];
- if (fd != NULL)
- fd_unref (fd);
- }
- FREE (fds);
-free_table:
- LOCK_DESTROY (&fdtable->lock);
- FREE (fdtable);
-}
-
-int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd)
-{
- int ret = -1;
- int error = 0;
-
- if (fdtable == NULL || fdptr == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return -1;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "Requested fd: %d", fd);
- LOCK (&fdtable->lock);
- {
- while (fdtable->max_fds < fd) {
- error = 0;
- error = booster_fdtable_expand (fdtable,
- fdtable->max_fds + 1);
- if (error) {
- gf_log ("booster-fd", GF_LOG_ERROR,
- "Cannot expand fdtable:%s",
- strerror (error));
- goto err;
- }
- }
-
- if (!fdtable->fds[fd]) {
- fdtable->fds[fd] = fdptr;
- fd_ref (fdptr);
- ret = fd;
- } else
- gf_log ("booster-fd", GF_LOG_ERROR, "Cannot allocate fd"
- " %d (slot not empty in fdtable)", fd);
- }
-err:
- UNLOCK (&fdtable->lock);
-
- return ret;
-}
-
-void
-booster_fd_put (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "FD put: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- return;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- fdtable->fds[fd] = NULL;
- }
- UNLOCK (&fdtable->lock);
-
- if (fdptr)
- fd_unref (fdptr);
-}
-
-fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
-
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "FD ptr request: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- errno = EINVAL;
- return NULL;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- if (fdptr)
- fd_ref (fdptr);
- }
- UNLOCK (&fdtable->lock);
-
- return fdptr;
-}
-
-void
-booster_fdptr_put (fd_t *booster_fd)
-{
- if (booster_fd)
- fd_unref (booster_fd);
-}
diff --git a/booster/src/booster-fd.h b/booster/src/booster-fd.h
deleted file mode 100644
index 595a112bd..000000000
--- a/booster/src/booster-fd.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _BOOSTER_FD_H
-#define _BOOSTER_FD_H
-
-#include <libglusterfsclient.h>
-#include <locking.h>
-#include <list.h>
-
-/* This struct must be updated if the fd_t in fd.h changes.
- * We cannot include those headers here because unistd.h, included
- * by glusterfs headers, conflicts with the syscall prototypes we
- * define for booster.
- */
-struct _fd {
- pid_t pid;
- int32_t flags;
- int32_t refcount;
- struct list_head inode_list;
- struct _inode *inode;
- struct _dict *ctx;
- gf_lock_t lock; /* used ONLY for manipulating
- 'struct _fd_ctx' array (_ctx).*/
- struct _fd_ctx *_ctx;
-};
-typedef struct _fd fd_t;
-
-struct _booster_fd_set {
- unsigned long fd_bits[0];
-};
-typedef struct _booster_fd_set booster_fd_set_t;
-
-struct _booster_fdtable {
- booster_fd_set_t *close_on_exec;
- int refcount;
- unsigned int max_fds;
- gf_lock_t lock;
- fd_t **fds;
-};
-typedef struct _booster_fdtable booster_fdtable_t;
-
-void
-booster_set_close_on_exec (booster_fdtable_t *fdtable, int fd);
-
-int
-booster_get_close_on_exec (booster_fdtable_t *fdtable, int fd);
-
-extern int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd);
-
-extern void
-booster_fd_put (booster_fdtable_t *fdtable, int fd);
-
-extern fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd);
-
-extern void
-booster_fdptr_put (fd_t *fdptr);
-
-extern void
-booster_fdtable_destroy (booster_fdtable_t *fdtable);
-
-booster_fdtable_t *
-booster_fdtable_alloc (void);
-
-#endif /* #ifndef _BOOSTER_FD_H */
diff --git a/booster/src/booster.c b/booster/src/booster.c
deleted file mode 100644
index c34ec1146..000000000
--- a/booster/src/booster.c
+++ /dev/null
@@ -1,3172 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <dlfcn.h>
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <libglusterfsclient.h>
-#include <list.h>
-#include <pthread.h>
-#include <sys/xattr.h>
-#include <string.h>
-#include <assert.h>
-#include <errno.h>
-#include <ctype.h>
-#include <logging.h>
-#include <utime.h>
-#include <dirent.h>
-#include <sys/statfs.h>
-#include <sys/statvfs.h>
-#include <fcntl.h>
-#include "booster-fd.h"
-
-#ifndef GF_UNIT_KB
-#define GF_UNIT_KB 1024
-#endif
-
-static pthread_mutex_t cwdlock = PTHREAD_MUTEX_INITIALIZER;
-
-/* attr constructor registers this function with libc's
- * _init function as a function that must be called before
- * the main() of the program.
- */
-static void booster_lib_init (void) __attribute__((constructor));
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-
-extern int pipe (int filedes[2]);
-/* We define these flags so that we can remove fcntl.h from the include path.
- * fcntl.h has certain defines and other lines of code that redirect the
- * application's open and open64 calls to the syscalls defined by
- * libc, for us, thats not a Good Thing (TM).
- */
-#ifndef GF_O_CREAT
-#define GF_O_CREAT 0x40
-#endif
-
-#ifndef GF_O_TRUNC
-#define GF_O_TRUNC 0x200
-#endif
-
-#ifndef GF_O_RDWR
-#define GF_O_RDWR 0x2
-#endif
-
-#ifndef GF_O_WRONLY
-#define GF_O_WRONLY 0x1
-#endif
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
-typedef enum {
- BOOSTER_OPEN,
- BOOSTER_CREAT
-} booster_op_t;
-
-struct _inode;
-struct _dict;
-
-ssize_t
-write (int fd, const void *buf, size_t count);
-
-/* open, open64, creat */
-static int (*real_open) (const char *pathname, int flags, ...);
-static int (*real_open64) (const char *pathname, int flags, ...);
-static int (*real_creat) (const char *pathname, mode_t mode);
-static int (*real_creat64) (const char *pathname, mode_t mode);
-
-/* read, readv, pread, pread64 */
-static ssize_t (*real_read) (int fd, void *buf, size_t count);
-static ssize_t (*real_readv) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pread) (int fd, void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pread64) (int fd, void *buf, size_t count,
- uint64_t offset);
-
-/* write, writev, pwrite, pwrite64 */
-static ssize_t (*real_write) (int fd, const void *buf, size_t count);
-static ssize_t (*real_writev) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pwrite) (int fd, const void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pwrite64) (int fd, const void *buf, size_t count,
- uint64_t offset);
-
-/* lseek, llseek, lseek64 */
-static off_t (*real_lseek) (int fildes, unsigned long offset, int whence);
-static off_t (*real_lseek64) (int fildes, uint64_t offset, int whence);
-
-/* close */
-static int (*real_close) (int fd);
-
-/* dup dup2 */
-static int (*real_dup) (int fd);
-static int (*real_dup2) (int oldfd, int newfd);
-
-static pid_t (*real_fork) (void);
-static int (*real_mkdir) (const char *pathname, mode_t mode);
-static int (*real_rmdir) (const char *pathname);
-static int (*real_chmod) (const char *pathname, mode_t mode);
-static int (*real_chown) (const char *pathname, uid_t owner, gid_t group);
-static int (*real_fchmod) (int fd, mode_t mode);
-static int (*real_fchown) (int fd, uid_t, gid_t gid);
-static int (*real_fsync) (int fd);
-static int (*real_ftruncate) (int fd, off_t length);
-static int (*real_ftruncate64) (int fd, loff_t length);
-static int (*real_link) (const char *oldpath, const char *newname);
-static int (*real_rename) (const char *oldpath, const char *newpath);
-static int (*real_utimes) (const char *path, const struct timeval times[2]);
-static int (*real_utime) (const char *path, const struct utimbuf *buf);
-static int (*real_mknod) (const char *path, mode_t mode, dev_t dev);
-static int (*real_mkfifo) (const char *path, mode_t mode);
-static int (*real_unlink) (const char *path);
-static int (*real_symlink) (const char *oldpath, const char *newpath);
-static int (*real_readlink) (const char *path, char *buf, size_t bufsize);
-static char * (*real_realpath) (const char *path, char *resolved);
-static DIR * (*real_opendir) (const char *path);
-static struct dirent * (*real_readdir) (DIR *dir);
-static struct dirent64 * (*real_readdir64) (DIR *dir);
-static int (*real_readdir_r) (DIR *dir, struct dirent *entry,
- struct dirent **result);
-static int (*real_readdir64_r) (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result);
-static int (*real_closedir) (DIR *dh);
-static int (*real___xstat) (int ver, const char *path, struct stat *buf);
-static int (*real___xstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_stat) (const char *path, struct stat *buf);
-static int (*real_stat64) (const char *path, struct stat64 *buf);
-static int (*real___fxstat) (int ver, int fd, struct stat *buf);
-static int (*real___fxstat64) (int ver, int fd, struct stat64 *buf);
-static int (*real_fstat) (int fd, struct stat *buf);
-static int (*real_fstat64) (int fd , struct stat64 *buf);
-static int (*real___lxstat) (int ver, const char *path, struct stat *buf);
-static int (*real___lxstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_lstat) (const char *path, struct stat *buf);
-static int (*real_lstat64) (const char *path, struct stat64 *buf);
-static int (*real_statfs) (const char *path, struct statfs *buf);
-static int (*real_statfs64) (const char *path, struct statfs64 *buf);
-static int (*real_statvfs) (const char *path, struct statvfs *buf);
-static int (*real_statvfs64) (const char *path, struct statvfs64 *buf);
-static ssize_t (*real_getxattr) (const char *path, const char *name,
- void *value, size_t size);
-static ssize_t (*real_lgetxattr) (const char *path, const char *name,
- void *value, size_t size);
-static int (*real_remove) (const char* path);
-static int (*real_lchown) (const char *path, uid_t owner, gid_t group);
-static void (*real_rewinddir) (DIR *dirp);
-static void (*real_seekdir) (DIR *dirp, off_t offset);
-static off_t (*real_telldir) (DIR *dirp);
-
-static ssize_t (*real_sendfile) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static ssize_t (*real_sendfile64) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static int (*real_fcntl) (int fd, int cmd, ...);
-static int (*real_chdir) (const char *path);
-static int (*real_fchdir) (int fd);
-static char * (*real_getcwd) (char *buf, size_t size);
-static int (*real_truncate) (const char *path, off_t length);
-static int (*real_truncate64) (const char *path, loff_t length);
-static int (*real_setxattr) (const char *path, const char *name,
- const void *value, size_t size, int flags);
-static int (*real_lsetxattr) (const char *path, const char *name,
- const void *value, size_t size, int flags);
-static int (*real_fsetxattr) (int filedes, const char *name,
- const void *value, size_t size, int flags);
-
-
-#define RESOLVE(sym) do { \
- if (!real_##sym) \
- real_##sym = dlsym (RTLD_NEXT, #sym); \
- } while (0)
-
-/*TODO: set proper value */
-#define MOUNT_HASH_SIZE 256
-
-struct booster_mount {
- dev_t st_dev;
- glusterfs_handle_t handle;
- struct list_head device_list;
-};
-typedef struct booster_mount booster_mount_t;
-
-static booster_fdtable_t *booster_fdtable = NULL;
-
-extern int booster_configure (char *confpath);
-/* This is dup'ed every time VMP open/creat wants a new fd.
- * This is needed so we occupy an entry in the process' file
- * table.
- */
-int process_piped_fd = -1;
-
-static int
-booster_get_process_fd ()
-{
- return real_dup (process_piped_fd);
-}
-
-/* The following two define which file contains
- * the FSTAB configuration for VMP-based usage.
- */
-#define DEFAULT_BOOSTER_CONF CONFDIR"/booster.conf"
-#define BOOSTER_CONF_ENV_VAR "GLUSTERFS_BOOSTER_FSTAB"
-
-
-/* The following define which log file is used when
- * using the old mount point bypass approach.
- */
-#define BOOSTER_DEFAULT_LOG CONFDIR"/booster.log"
-#define BOOSTER_LOG_ENV_VAR "GLUSTERFS_BOOSTER_LOG"
-
-void
-do_open (int fd, const char *pathname, int flags, mode_t mode, booster_op_t op)
-{
- char *specfile = NULL;
- char *mount_point = NULL;
- int32_t size = 0;
- int32_t ret = -1;
- FILE *specfp = NULL;
- glusterfs_file_t fh = NULL;
- char *logfile = NULL;
- glusterfs_init_params_t iparams = {
- .loglevel = "error",
- .lookup_timeout = 600,
- .stat_timeout = 600,
- };
-
- gf_log ("booster", GF_LOG_DEBUG, "Opening using MPB: %s", pathname);
- size = fgetxattr (fd, "user.glusterfs-booster-volfile", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfile = calloc (1, size);
- if (!specfile) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-volfile", specfile,
- size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfp = tmpfile ();
- if (!specfp) {
- gf_log ("booster", GF_LOG_ERROR, "Temp file creation failed"
- ": %s", strerror (errno));
- goto out;
- }
-
- ret = fwrite (specfile, size, 1, specfp);
- if (ret != 1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to write volfile: %s",
- strerror (errno));
- goto out;
- }
-
- fseek (specfp, 0L, SEEK_SET);
-
- size = fgetxattr (fd, "user.glusterfs-booster-mount", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- mount_point = calloc (size, sizeof (char));
- if (!mount_point) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-mount", mount_point, size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- logfile = getenv (BOOSTER_LOG_ENV_VAR);
- if (logfile) {
- if (strlen (logfile) > 0)
- iparams.logfile = strdup (logfile);
- else
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- } else {
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Using log-file: %s", iparams.logfile);
- iparams.specfp = specfp;
-
- ret = glusterfs_mount (mount_point, &iparams);
- if (ret == -1) {
- if (errno != EEXIST) {
- gf_log ("booster", GF_LOG_ERROR, "Mount failed over"
- " glusterfs");
- goto out;
- } else
- gf_log ("booster", GF_LOG_ERROR, "Already mounted");
- }
-
- switch (op) {
- case BOOSTER_OPEN:
- gf_log ("booster", GF_LOG_TRACE, "Booster open call");
- fh = glusterfs_open (pathname, flags, mode);
- break;
-
- case BOOSTER_CREAT:
- gf_log ("booster", GF_LOG_TRACE, "Booster create call");
- fh = glusterfs_creat (pathname, mode);
- break;
- }
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Error performing operation");
- goto out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to get unused FD");
- goto out;
- }
- fh = NULL;
-
-out:
- if (specfile) {
- free (specfile);
- }
-
- if (specfp) {
- fclose (specfp);
- }
-
- if (mount_point) {
- free (mount_point);
- }
-
- if (fh) {
- glusterfs_close (fh);
- }
-
- return;
-}
-
-int
-vmp_open (const char *pathname, int flags, ...)
-{
- mode_t mode = 0;
- int fd = -1;
- glusterfs_file_t fh = NULL;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- fh = glusterfs_open (pathname, flags, mode);
- }
- else
- fh = glusterfs_open (pathname, flags);
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "VMP open failed");
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create open fd");
- goto fh_close_out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map fd into table");
- goto realfd_close_out;
- }
-
- return fd;
-
-realfd_close_out:
- real_close (fd);
- fd = -1;
-
-fh_close_out:
- glusterfs_close (fh);
-
-out:
- return fd;
-}
-
-#define BOOSTER_USE_OPEN64 1
-#define BOOSTER_DONT_USE_OPEN64 0
-
-int
-booster_open (const char *pathname, int use64, int flags, ...)
-{
- int ret = -1;
- mode_t mode = 0;
- va_list ap;
- int (*my_open) (const char *pathname, int flags, ...);
-
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Open: %s", pathname);
- /* First try opening through the virtual mount point.
- * The difference lies in the fact that:
- * 1. We depend on libglusterfsclient library to perform
- * the translation from the path to handle.
- * 2. We do not go to the file system for the fd, instead
- * we use booster_get_process_fd (), which returns a dup'ed
- * fd of a pipe created in booster_init.
- */
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- ret = vmp_open (pathname, flags, mode);
- }
- else
- ret = vmp_open (pathname, flags);
-
- /* We receive an ENODEV if the VMP does not exist. If we
- * receive an error other than ENODEV, it means, there
- * actually was an error performing vmp_open. This must
- * be returned to the user.
- */
- if ((ret < 0) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Error in opening file over "
- " VMP: %s", strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File opened");
- goto out;
- }
-
- if (use64) {
- gf_log ("booster", GF_LOG_TRACE, "Using 64-bit open");
- my_open = real_open64;
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Using 32-bit open");
- my_open = real_open;
- }
-
- /* It is possible the RESOLVE macro is not able
- * to resolve the symbol of a function, in that case
- * we dont want to seg-fault on calling a NULL functor.
- */
- if (my_open == NULL) {
- gf_log ("booster", GF_LOG_ERROR, "open not resolved");
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = my_open (pathname, flags, mode);
- } else
- ret = my_open (pathname, flags);
-
- if (ret != -1) {
- do_open (ret, pathname, flags, mode, BOOSTER_OPEN);
- }
-
-out:
- return ret;
-}
-
-/* This is done to over-write existing definitions of open and open64 inside
- * libc with our own copies. __REDIRECT is provided by libc.
- *
- * XXX: This will not work anywhere other than libc based systems.
- */
-int __REDIRECT (booster_false_open, (__const char *__file, int __oflag, ...),
- open) __nonnull ((1));
-int __REDIRECT (booster_false_open64, (__const char *__file, int __oflag, ...),
- open64) __nonnull ((1));
-int
-booster_false_open (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags,
- mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-booster_false_open64 (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags, mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-vmp_creat (const char *pathname, mode_t mode)
-{
- int fd = -1;
- glusterfs_file_t fh = NULL;
-
- fh = glusterfs_creat (pathname, mode);
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Create failed: %s: %s",
- pathname, strerror (errno));
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create fd");
- goto close_out;
- }
-
- if ((booster_fd_unused_get (booster_fdtable, fh, fd)) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map unused fd");
- goto real_close_out;
- }
-
- return fd;
-
-real_close_out:
- real_close (fd);
- fd = -1;
-
-close_out:
- glusterfs_close (fh);
-
-out:
- return -1;
-}
-
-int __REDIRECT (booster_false_creat, (const char *pathname, mode_t mode),
- creat) __nonnull ((1));
-int __REDIRECT (booster_false_creat64, (const char *pathname, mode_t mode),
- creat64) __nonnull ((1));
-
-int
-booster_false_creat (const char *pathname, mode_t mode)
-{
- int ret = -1;
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Create: %s", pathname);
- ret = vmp_creat (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "VMP create failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File created");
- goto out;
- }
-
- if (real_creat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_creat (pathname, mode);
-
- if (ret != -1) {
- do_open (ret, pathname, GF_O_WRONLY | GF_O_TRUNC, mode,
- BOOSTER_CREAT);
- } else
- gf_log ("booster", GF_LOG_ERROR, "real create failed: %s",
- strerror (errno));
-
-out:
- return ret;
-}
-
-
-int
-booster_false_creat64 (const char *pathname, mode_t mode)
-{
- int ret = -1;
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Create: %s", pathname);
- ret = vmp_creat (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "VMP create failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File created");
- goto out;
- }
-
- if (real_creat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_creat64 (pathname, mode);
-
- if (ret != -1) {
- do_open (ret, pathname, GF_O_WRONLY | GF_O_TRUNC, mode,
- BOOSTER_CREAT);
- } else
- gf_log ("booster", GF_LOG_ERROR, "real create failed: %s",
- strerror (errno));
-
-out:
- return ret;
-}
-
-
-/* pread */
-
-ssize_t
-pread (int fd, void *buf, size_t count, unsigned long offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pread: fd %d, count %lu, offset %lu"
- ,fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_pread == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pread64 (int fd, void *buf, size_t count, uint64_t offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pread64: fd %d, count %lu, offset %"
- PRIu64, fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_pread64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-read (int fd, void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd;
-
- gf_log ("booster", GF_LOG_TRACE, "read: fd %d, count %lu", fd,
- (long unsigned)count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_read == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_read (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_read (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-readv (int fd, const struct iovec *vector, int count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "readv: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_readv == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readv (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_readv (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-write (int fd, const void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "write: fd %d, count %"GF_PRI_SIZET,
- fd, count);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_write == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_write (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_write (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-ssize_t
-writev (int fd, const struct iovec *vector, int count)
-{
- int ret = 0;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "writev: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_writev == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_writev (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_writev (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite (int fd, const void *buf, size_t count, unsigned long offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pwrite: fd %d, count %"GF_PRI_SIZET
- ", offset %lu", fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_pwrite == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite64 (int fd, const void *buf, size_t count, uint64_t offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pwrite64: fd %d, count %"GF_PRI_SIZET
- ", offset %"PRIu64, fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_pwrite64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-int
-close (int fd)
-{
- int ret = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "close: fd %d", fd);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- booster_fd_put (booster_fdtable, fd);
- ret = glusterfs_close (glfs_fd);
- booster_fdptr_put (glfs_fd);
- }
-
- ret = real_close (fd);
-
- return ret;
-}
-
-#ifndef _LSEEK_DECLARED
-#define _LSEEK_DECLARED
-off_t
-lseek (int filedes, unsigned long offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "lseek: fd %d, offset %ld",
- filedes, offset);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_lseek == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek (filedes, offset, whence);
- }
-
- return ret;
-}
-#endif
-
-off_t
-lseek64 (int filedes, uint64_t offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
-
- gf_log ("booster", GF_LOG_TRACE, "lseek: fd %d, offset %"PRIu64,
- filedes, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_lseek64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek64 (filedes, offset, whence);
- }
-
- return ret;
-}
-
-int
-dup (int oldfd)
-{
- int ret = -1, new_fd = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "dup: fd %d", oldfd);
- glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_fd = real_dup (oldfd);
-
- if (new_fd >=0 && glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = booster_fd_unused_get (booster_fdtable, glfs_fd,
- new_fd);
- fd_ref ((fd_t *)glfs_fd);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR,"Failed to map new fd");
- real_close (new_fd);
- }
- }
-
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return new_fd;
-}
-
-
-int
-dup2 (int oldfd, int newfd)
-{
- int ret = -1;
- glusterfs_file_t old_glfs_fd = NULL, new_glfs_fd = NULL;
-
- if (oldfd == newfd) {
- return newfd;
- }
-
- old_glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_glfs_fd = booster_fdptr_get (booster_fdtable, newfd);
-
- ret = real_dup2 (oldfd, newfd);
- if (ret >= 0) {
- if (new_glfs_fd) {
- glusterfs_close (new_glfs_fd);
- booster_fdptr_put (new_glfs_fd);
- booster_fd_put (booster_fdtable, newfd);
- new_glfs_fd = 0;
- }
-
- if (old_glfs_fd) {
- ret = booster_fd_unused_get (booster_fdtable,
- old_glfs_fd, newfd);
- fd_ref ((fd_t *)old_glfs_fd);
- if (ret == -1) {
- real_close (newfd);
- }
- }
- }
-
- if (old_glfs_fd) {
- booster_fdptr_put (old_glfs_fd);
- }
-
- if (new_glfs_fd) {
- booster_fdptr_put (new_glfs_fd);
- }
-
- return ret;
-}
-
-int
-mkdir (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mkdir: path %s", pathname);
- ret = glusterfs_mkdir (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "directory created");
- return ret;
- }
-
- if (real_mkdir == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else
- ret = real_mkdir (pathname, mode);
-
- return ret;
-}
-
-int
-rmdir (const char *pathname)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "rmdir: path %s", pathname);
- ret = glusterfs_rmdir (pathname);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "rmdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "directory removed");
- return ret;
- }
-
- if (real_rmdir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rmdir (pathname);
-
- return ret;
-}
-
-int
-chmod (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "chmod: path %s", pathname);
- ret = glusterfs_chmod (pathname, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chmod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chmod succeeded");
- return ret;
- }
-
- if (real_chmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chmod (pathname, mode);
-
- return ret;
-}
-
-int
-chown (const char *pathname, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "chown: path: %s", pathname);
- ret = glusterfs_chown (pathname, owner, group);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chown failed: %s\n",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chown succeeded");
- return ret;
- }
-
- if (real_chown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chown (pathname, owner, group);
-
- return ret;
-}
-
-int
-fchown (int fd, uid_t owner, gid_t group)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fchown: fd %d, uid %d, gid %d", fd,
- owner, group);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchown (fd, owner, group);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchown (fh, owner, group);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-
-#define MOUNT_TABLE_HASH_SIZE 256
-
-
-static void booster_cleanup (void);
-static int
-booster_init (void)
-{
- char *booster_conf_path = NULL;
- int ret = -1;
- int pipefd[2];
-
- booster_fdtable = booster_fdtable_alloc ();
- if (!booster_fdtable) {
- fprintf (stderr, "cannot allocate fdtable: %s\n",
- strerror (errno));
- goto err;
- }
-
- if (pipe (pipefd) == -1) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Pipe creation failed:%s"
- , strerror (errno));
- goto err;
- }
-
- process_piped_fd = pipefd[0];
- real_close (pipefd[1]);
- /* libglusterfsclient based VMPs should be inited only
- * after the file tables are inited so that if the socket
- * calls use the fd based syscalls, the fd tables are
- * correctly initialized to return a NULL handle, on which the
- * socket calls will fall-back to the real API.
- */
- booster_conf_path = getenv (BOOSTER_CONF_ENV_VAR);
- if (booster_conf_path != NULL) {
- if (strlen (booster_conf_path) > 0)
- ret = booster_configure (booster_conf_path);
- else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, "
- "using default path: %s", BOOSTER_CONF_ENV_VAR,
- DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
- } else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, using default"
- " path: %s", BOOSTER_CONF_ENV_VAR,DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
-
- atexit (booster_cleanup);
- if (ret == 0)
- gf_log ("booster", GF_LOG_DEBUG, "booster is inited");
- return 0;
-
-err:
- /* Sure we return an error value here
- * but who cares about booster.
- */
- return -1;
-}
-
-
-static void
-booster_cleanup (void)
-{
- /* Ideally, we should be de-initing the fd-table
- * here but the problem is that I've seen file accesses through booster
- * continuing while the atexit registered function is called. That means
- * , we cannot dealloc the fd-table since then there could be a crash
- * while trying to determine whether a given fd is for libc or for
- * libglusterfsclient.
- * We should be satisfied with having cleaned up glusterfs contexts.
- */
- glusterfs_umount_all ();
- glusterfs_reset ();
-}
-
-int
-fchmod (int fd, mode_t mode)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fchmod: fd %d, mode: 0x%x", fd, mode);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fchmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchmod (fd, mode);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchmod (fh, mode);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-fsync (int fd)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fsync: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fsync == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fsync (fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fsync (fh);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int __REDIRECT (booster_false_ftruncate, (int fd, off_t length),
- ftruncate);
-int __REDIRECT (booster_false_ftruncate64, (int fd, loff_t length),
- ftruncate64);
-
-int
-booster_false_ftruncate (int fd, off_t length)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "ftruncate: fd %d, length: %"PRIu64,fd
- , length);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_ftruncate == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_ftruncate (fd, length);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_ftruncate (fh, length);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-booster_false_ftruncate64 (int fd, loff_t length)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "ftruncate: fd %d, length: %"PRIu64,fd
- , length);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_ftruncate == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_ftruncate64 (fd, length);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_ftruncate (fh, length);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-link (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "link: old: %s, new: %s", old, new);
- ret = glusterfs_link (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Link failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "link call succeeded");
- return ret;
- }
-
- if (real_link == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_link (old, new);
-
- return ret;
-}
-
-int
-rename (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "link: old: %s, new: %s", old, new);
- ret = glusterfs_rename (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Rename failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "Rename succeeded");
- return ret;
- }
-
- if (real_rename == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rename (old, new);
-
- return ret;
-}
-
-int
-utimes (const char *path, const struct timeval times[2])
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "utimes: path %s", path);
- ret = glusterfs_utimes (path, times);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utimes failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "utimes succeeded");
- return ret;
- }
-
- if (real_utimes == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utimes (path, times);
-
- return ret;
-}
-
-int
-utime (const char *path, const struct utimbuf *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "utime: path %s", path);
- ret = glusterfs_utime (path, buf);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utime failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "utime succeeded");
- return ret;
- }
-
- if (real_utime == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utime (path, buf);
-
- return ret;
-}
-
-int
-mknod (const char *path, mode_t mode, dev_t dev)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mknod: path %s", path);
- ret = glusterfs_mknod (path, mode, dev);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mknod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "mknod succeeded");
- return ret;
- }
-
- if (real_mknod) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mknod (path, mode, dev);
-
- return ret;
-}
-
-int
-mkfifo (const char *path, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mkfifo: path %s", path);
- ret = glusterfs_mkfifo (path, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkfifo failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "mkfifo succeeded");
- return ret;
- }
-
- if (real_mkfifo == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mkfifo (path, mode);
-
- return ret;
-}
-
-int
-unlink (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "unlink: path %s", path);
- ret = glusterfs_unlink (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "unlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "unlink succeeded");
- return ret;
- }
-
- if (real_unlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_unlink (path);
-
- return ret;
-}
-
-int
-symlink (const char *oldpath, const char *newpath)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "symlink: old: %s, new: %s",
- oldpath, newpath);
- ret = glusterfs_symlink (oldpath, newpath);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "symlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "symlink succeeded");
- return ret;
- }
-
- if (real_symlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_symlink (oldpath, newpath);
-
- return ret;
-}
-
-int
-readlink (const char *path, char *buf, size_t bufsize)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "readlink: path %s", path);
- ret = glusterfs_readlink (path, buf, bufsize);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "readlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "readlink succeeded");
- return ret;
- }
-
- if (real_readlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readlink (path, buf, bufsize);
-
- return ret;
-}
-
-char *
-realpath (const char *path, char *resolved_path)
-{
- char *res = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "realpath: path %s", path);
- res = glusterfs_realpath (path, resolved_path);
- if ((res == NULL) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "realpath failed: %s",
- strerror (errno));
- return res;
- }
-
- if (res != NULL) {
- gf_log ("booster", GF_LOG_TRACE, "realpath succeeded");
- return res;
- }
-
- if (real_realpath == NULL) {
- errno = ENOSYS;
- res = NULL;
- } else
- res = real_realpath (path, resolved_path);
-
- return res;
-}
-
-#define BOOSTER_GL_DIR 1
-#define BOOSTER_POSIX_DIR 2
-
-struct booster_dir_handle {
- int type;
- void *dirh;
-};
-
-DIR *
-opendir (const char *path)
-{
- glusterfs_dir_t gdir = NULL;
- struct booster_dir_handle *bh = NULL;
- DIR *pdir = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "opendir: path: %s", path);
- bh = calloc (1, sizeof (struct booster_dir_handle));
- if (!bh) {
- gf_log ("booster", GF_LOG_ERROR, "memory allocation failed");
- errno = ENOMEM;
- goto out;
- }
-
- gdir = glusterfs_opendir (path);
- if (gdir) {
- gf_log ("booster", GF_LOG_TRACE, "Gluster dir opened");
- bh->type = BOOSTER_GL_DIR;
- bh->dirh = (void *)gdir;
- goto out;
- } else if ((!gdir) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Opendir failed");
- goto free_out;
- }
-
- if (real_opendir == NULL) {
- errno = ENOSYS;
- goto free_out;
- }
-
- pdir = real_opendir (path);
-
- if (pdir) {
- bh->type = BOOSTER_POSIX_DIR;
- bh->dirh = (void *)pdir;
- goto out;
- }
-
-free_out:
- if (bh) {
- free (bh);
- bh = NULL;
- }
-out:
- return (DIR *)bh;
-}
-
-int __REDIRECT (booster_false_readdir_r, (DIR *dir, struct dirent *entry,
- struct dirent **result), readdir_r) __nonnull ((1));
-int __REDIRECT (booster_false_readdir64_r, (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result), readdir64_r) __nonnull ((1));
-
-int
-booster_false_readdir_r (DIR *dir, struct dirent *entry, struct dirent **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh, entry,
- result);
-
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on posix");
- if (real_readdir_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-int
-booster_false_readdir64_r (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh,
- (struct dirent *)entry,
- (struct dirent **)result);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on posix");
- if (real_readdir64_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir64_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-struct dirent *
-__REDIRECT (booster_false_readdir, (DIR *dir), readdir) __nonnull ((1));
-
-struct dirent64 *
-__REDIRECT (booster_false_readdir64, (DIR *dir), readdir64) __nonnull ((1));
-
-struct dirent *
-booster_false_readdir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- struct dirent *dirp = NULL;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on gluster");
- dirp = glusterfs_readdir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on posix");
- if (real_readdir == NULL) {
- errno = ENOSYS;
- dirp = NULL;
- goto out;
- }
-
- dirp = real_readdir ((DIR *)bh->dirh);
- } else {
- dirp = NULL;
- errno = EINVAL;
- }
-
-out:
- return dirp;
-}
-
-struct dirent64 *
-booster_false_readdir64 (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- struct dirent64 *dirp = NULL;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on gluster");
- dirp = glusterfs_readdir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on posix");
- if (real_readdir == NULL) {
- errno = ENOSYS;
- dirp = NULL;
- goto out;
- }
-
- dirp = real_readdir64 ((DIR *)bh->dirh);
- } else {
- dirp = NULL;
- errno = EINVAL;
- }
-
-out:
- return dirp;
-}
-
-int
-closedir (DIR *dh)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dh;
- int ret = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "closedir on gluster");
- ret = glusterfs_closedir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "closedir on posix");
- if (real_closedir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_closedir ((DIR *)bh->dirh);
- } else {
- errno = EBADF;
- }
-
- if (ret == 0) {
- free (bh);
- bh = NULL;
- }
-out:
- return ret;
-}
-
-/* The real stat functions reside in booster_stat.c to
- * prevent clash with the statX prototype and functions
- * declared from sys/stat.h
- */
-int
-booster_xstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "xstat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "xstat succeeded");
- goto out;
- }
-
- if (real___xstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___xstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_xstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "xstat64: path: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "xstat64 succeeded");
- goto out;
- }
-
- if (real___xstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___xstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_stat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "stat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "stat succeeded");
- goto out;
- }
-
- if (real_stat != NULL)
- ret = real_stat (path, sbuf);
- else if (real___xstat != NULL)
- ret = real___xstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_stat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "stat64: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "stat64 succeeded");
- goto out;
- }
-
- if (real_stat64 != NULL)
- ret = real_stat64 (path, sbuf);
- else if (real___xstat64 != NULL)
- ret = real___xstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat (int ver, int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fxstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real___fxstat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___fxstat (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat64 (int ver, int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fxstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real___fxstat64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- ret = real___fxstat64 (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat (int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fstat != NULL)
- ret = real_fstat (fd, sbuf);
- else if (real___fxstat != NULL)
- ret = real___fxstat (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat64 (int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fstat64 != NULL)
- ret = real_fstat64 (fd, sbuf);
- else if (real___fxstat64 != NULL)
- /* Not sure how portable the use of 0 for
- * version number is but it works over glibc.
- * We need this because, I've
- * observed that all the above real* functors can be
- * NULL. In that case, this is our last and only option.
- */
- ret = real___fxstat64 (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_lxstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lxstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lxstat succeeded");
- goto out;
- }
-
- if (real___lxstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___lxstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lxstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "lxstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lxstat64 succeeded");
- goto out;
- }
-
- if (real___lxstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___lxstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lstat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lstat succeeded");
- goto out;
- }
-
- if (real_lstat != NULL)
- ret = real_lstat (path, sbuf);
- else if (real___lxstat != NULL)
- ret = real___lxstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_lstat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "lstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lstat64 succeeded");
- goto out;
- }
-
- if (real_lstat64 != NULL)
- ret = real_lstat64 (path, sbuf);
- else if (real___lxstat64 != NULL)
- ret = real___lxstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_statfs (const char *pathname, struct statfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statfs: path %s", pathname);
- ret = glusterfs_statfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statfs succeeded");
- goto out;
- }
-
- if (real_statfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statfs64 (const char *pathname, struct statfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "stat64: path %s", pathname);
- ret = glusterfs_statfs (pathname, (struct statfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statfs64 succeeded");
- goto out;
- }
-
- if (real_statfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs (const char *pathname, struct statvfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statvfs: path %s", pathname);
- ret = glusterfs_statvfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statvfs succeeded");
- goto out;
- }
-
- if (real_statvfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs64 (const char *pathname, struct statvfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statvfs64: path %s", pathname);
- ret = glusterfs_statvfs (pathname, (struct statvfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statvfs64 succeeded");
- goto out;
- }
-
- if (real_statvfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-ssize_t
-getxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "getxattr: path %s, name %s", path,
- name);
- ret = glusterfs_getxattr (path, name, value, size);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "getxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "getxattr succeeded");
- return ret;
- }
-
- if (real_getxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_getxattr (path, name, value, size);
-out:
- return ret;
-}
-
-
-ssize_t
-lgetxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lgetxattr: path %s, name %s", path,
- name);
- ret = glusterfs_lgetxattr (path, name, value, size);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lgetxattr failed: %s",
- strerror (errno));
-
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "lgetxattr succeeded");
- return ret;
- }
-
- if (real_lgetxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_lgetxattr (path, name, value, size);
-out:
- return ret;
-}
-
-int
-remove (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "remove: %s", path);
- ret = glusterfs_remove (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "remove failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "remove succeeded");
- goto out;
- }
-
- if (real_remove == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_remove (path);
-
-out:
- return ret;
-}
-
-int
-lchown (const char *path, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lchown: path %s", path);
- ret = glusterfs_lchown (path, owner, group);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lchown failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_ERROR, "lchown succeeded");
- goto out;
- }
-
- if (real_lchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_lchown (path, owner, group);
-
-out:
- return ret;
-}
-
-void
-booster_rewinddir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "rewinddir on glusterfs");
- glusterfs_rewinddir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_rewinddir == NULL) {
- errno = ENOSYS;
- goto out;
- }
- gf_log ("booster", GF_LOG_TRACE, "rewinddir on posix");
- real_rewinddir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-void
-booster_seekdir (DIR *dir, off_t offset)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "seekdir on glusterfs");
- glusterfs_seekdir ((glusterfs_dir_t)bh->dirh, offset);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_seekdir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "seekdir on posix");
- real_seekdir ((DIR *)bh->dirh, offset);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-off_t
-booster_telldir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- off_t offset = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "telldir on glusterfs");
- offset = glusterfs_telldir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_telldir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "telldir on posix");
- offset = real_telldir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return offset;
-}
-
-
-pid_t
-fork (void)
-{
- pid_t pid = 0;
- char child = 0;
-
- glusterfs_log_lock ();
- {
- pid = real_fork ();
- }
- glusterfs_log_unlock ();
-
- child = (pid == 0);
- if (child) {
- booster_cleanup ();
- booster_init ();
- }
-
- return pid;
-}
-
-ssize_t
-sendfile (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "sendfile: in fd %d, out fd %d, offset"
- " %"PRIu64", count %"GF_PRI_SIZET, in_fd, out_fd, *offset,
- count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_sendfile == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-ssize_t
-sendfile64 (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "sendfile64: in fd %d, out fd %d,"
- " offset %"PRIu64", count %"GF_PRI_SIZET, in_fd, out_fd,
- *offset, count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_sendfile64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile64 (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-
-int
-fcntl (int fd, int cmd, ...)
-{
- va_list ap;
- int ret = -1;
- long arg = 0;
- struct flock *lock = NULL;
- glusterfs_file_t glfs_fd = 0;
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- gf_log ("booster", GF_LOG_TRACE, "fcntl: fd %d, cmd %d", fd, cmd);
- switch (cmd) {
- case F_DUPFD:
- ret = dup (fd);
- break;
- /*
- * FIXME: Consider this case when implementing F_DUPFD, F_GETFD
- * etc flags in libglusterfsclient. Commenting it out for
- * timebeing since it is defined only in linux kernel
- * versions >= 2.6.24.
- */
- /* case F_DUPFD_CLOEXEC: */
- case F_GETFD:
- if (glfs_fd != NULL) {
- ret = booster_get_close_on_exec (booster_fdtable, fd)
- ? FD_CLOEXEC : 0;
- } else {
- if (real_fcntl == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else {
- ret = real_fcntl (fd, cmd);
- }
- }
- break;
-
- case F_GETFL:
- case F_GETOWN:
- case F_GETSIG:
- case F_GETLEASE:
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd);
- }
- break;
-
- case F_SETFD:
- if (glfs_fd != NULL) {
- booster_set_close_on_exec (booster_fdtable, fd);
- ret = 0;
- } else {
- if (real_fcntl == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else {
- ret = real_fcntl (fd, cmd);
- }
- }
- break;
-
- case F_SETFL:
- case F_SETOWN:
- case F_SETSIG:
- case F_SETLEASE:
- case F_NOTIFY:
- va_start (ap, cmd);
- arg = va_arg (ap, long);
- va_end (ap);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, arg);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd, arg);
- }
- break;
-
- case F_GETLK:
- case F_SETLK:
- case F_SETLKW:
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- va_start (ap, cmd);
- lock = va_arg (ap, struct flock *);
- va_end (ap);
-
- if (lock == NULL) {
- errno = EINVAL;
- goto out;
- }
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, lock);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd, lock);
- }
- break;
-
- default:
- errno = EINVAL;
- break;
- }
-
-out:
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-int
-chdir (const char *path)
-{
- int ret = -1;
- char cwd[PATH_MAX];
- char *res = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "chdir: path %s", path);
-
- pthread_mutex_lock (&cwdlock);
- {
- res = glusterfs_getcwd (cwd, PATH_MAX);
- if (res == NULL) {
- gf_log ("booster", GF_LOG_ERROR, "getcwd failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- ret = glusterfs_chdir (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chdir failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chdir succeeded");
- goto unlock;
- }
-
- if (real_chdir == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto unlock;
- }
-
- ret = real_chdir (path);
- if (ret == -1) {
- glusterfs_chdir (cwd);
- }
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- return ret;
-}
-
-
-int
-fchdir (int fd)
-{
- int ret = -1;
- glusterfs_file_t glfs_fd = 0;
- char cwd[PATH_MAX];
- char *res = NULL;
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_write == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_fchdir (fd);
- if (ret == 0) {
- res = real_getcwd (cwd, PATH_MAX);
- if (res == NULL) {
- gf_log ("booster", GF_LOG_ERROR,
- "getcwd failed (%s)",
- strerror (errno));
- ret = -1;
- } else {
- glusterfs_chdir (cwd);
- }
- }
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchdir (glfs_fd);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-char *
-getcwd (char *buf, size_t size)
-{
- char *res = NULL;
-
- res = glusterfs_getcwd (buf, size);
- if ((res == NULL) && (errno == ENODEV)) {
- res = real_getcwd (buf, size);
- }
-
- return res;
-}
-
-
-int __REDIRECT (booster_false_truncate, (const char *path, off_t length),
- truncate) __nonnull ((1));
-int __REDIRECT (booster_false_truncate64, (const char *path, loff_t length),
- truncate64) __nonnull ((1));;
-
-int
-booster_false_truncate (const char *path, off_t length)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "truncate: path (%s) length (%"PRIu64
- ")", path, length);
-
- ret = glusterfs_truncate (path, length);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "truncate failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "truncate succeeded");
- goto out;
- }
-
- if (real_truncate != NULL)
- ret = real_truncate (path, length);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-booster_false_truncate64 (const char *path, loff_t length)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "truncate64: path (%s) length "
- "(%"PRIu64")", path, length);
-
- ret = glusterfs_truncate (path, length);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "truncate64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "truncate64 succeeded");
- goto out;
- }
-
- if (real_truncate64 != NULL)
- ret = real_truncate64 (path, length);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-setxattr (const char *path, const char *name, const void *value, size_t size,
- int flags)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "setxattr: path: %s", path);
- ret = glusterfs_setxattr (path, name, value, size, flags);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "setxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "setxattr succeeded");
- goto out;
- }
-
- if (real_setxattr != NULL)
- ret = real_setxattr (path, name, value, size, flags);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-lsetxattr (const char *path, const char *name, const void *value, size_t size,
- int flags)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lsetxattr: path: %s", path);
- ret = glusterfs_lsetxattr (path, name, value, size, flags);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lsetxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lsetxattr succeeded");
- goto out;
- }
-
- if (real_lsetxattr != NULL)
- ret = real_lsetxattr (path, name, value, size, flags);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-fsetxattr (int fd, const char *name, const void *value, size_t size, int flags)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fsetxattr: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fsetxattr != NULL)
- ret = real_fsetxattr (fd, name, value, size, flags);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fsetxattr (fh, name, value, size, flags);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-
-void
-booster_lib_init (void)
-{
-
- RESOLVE (open);
- RESOLVE (open64);
- RESOLVE (creat);
- RESOLVE (creat64);
-
- RESOLVE (read);
- RESOLVE (readv);
- RESOLVE (pread);
- RESOLVE (pread64);
-
- RESOLVE (write);
- RESOLVE (writev);
- RESOLVE (pwrite);
- RESOLVE (pwrite64);
-
- RESOLVE (lseek);
- RESOLVE (lseek64);
-
- RESOLVE (close);
-
- RESOLVE (dup);
- RESOLVE (dup2);
-
- RESOLVE (fork);
- RESOLVE (mkdir);
- RESOLVE (rmdir);
- RESOLVE (chmod);
- RESOLVE (chown);
- RESOLVE (fchmod);
- RESOLVE (fchown);
- RESOLVE (fsync);
- RESOLVE (ftruncate);
- RESOLVE (ftruncate64);
- RESOLVE (link);
- RESOLVE (rename);
- RESOLVE (utimes);
- RESOLVE (utime);
- RESOLVE (mknod);
- RESOLVE (mkfifo);
- RESOLVE (unlink);
- RESOLVE (symlink);
- RESOLVE (readlink);
- RESOLVE (realpath);
- RESOLVE (opendir);
- RESOLVE (readdir);
- RESOLVE (readdir64);
- RESOLVE (closedir);
- RESOLVE (__xstat);
- RESOLVE (__xstat64);
- RESOLVE (stat);
- RESOLVE (stat64);
- RESOLVE (__fxstat);
- RESOLVE (__fxstat64);
- RESOLVE (fstat);
- RESOLVE (fstat64);
- RESOLVE (__lxstat);
- RESOLVE (__lxstat64);
- RESOLVE (lstat);
- RESOLVE (lstat64);
- RESOLVE (statfs);
- RESOLVE (statfs64);
- RESOLVE (statvfs);
- RESOLVE (statvfs64);
- RESOLVE (getxattr);
- RESOLVE (lgetxattr);
- RESOLVE (remove);
- RESOLVE (lchown);
- RESOLVE (rewinddir);
- RESOLVE (seekdir);
- RESOLVE (telldir);
- RESOLVE (sendfile);
- RESOLVE (sendfile64);
- RESOLVE (readdir_r);
- RESOLVE (readdir64_r);
- RESOLVE (fcntl);
- RESOLVE (chdir);
- RESOLVE (fchdir);
- RESOLVE (getcwd);
- RESOLVE (truncate);
- RESOLVE (truncate64);
- RESOLVE (setxattr);
- RESOLVE (lsetxattr);
- RESOLVE (fsetxattr);
-
- /* This must be called after resolving real functions
- * above so that the socket based IO calls in libglusterfsclient
- * can fall back to a non-NULL real_XXX function pointer.
- * Calling booster_init before resolving the names above
- * results in seg-faults because the function symbols above are NULL.
- */
- booster_init ();
-}
-
diff --git a/booster/src/booster_fstab.c b/booster/src/booster_fstab.c
deleted file mode 100644
index 202249cad..000000000
--- a/booster/src/booster_fstab.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995-2000, 2001, 2002, 2003, 2006
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <alloca.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include "booster_fstab.h"
-#include <stdlib.h>
-#include <libglusterfsclient.h>
-#include <errno.h>
-
-/* The default timeout for inode and stat cache. */
-#define BOOSTER_DEFAULT_ATTR_TIMEO 5 /* In Secs */
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-glusterfs_fstab_t *
-glusterfs_fstab_init (const char *file, const char *mode)
-{
- glusterfs_fstab_t *handle = NULL;
- handle = calloc (1, sizeof (glusterfs_fstab_t));
- if (!handle) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Memory allocation"
- " failed");
- goto out;
- }
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "FSTAB file: %s", file);
- FILE *result = fopen (file,mode);
- if (result != NULL) {
- handle->fp = result;
- } else {
- gf_log ("booster-fstab", GF_LOG_ERROR, "FSTAB file open failed:"
- " %s", strerror (errno));
- free (handle);
- handle = NULL;
- }
-
-out:
-
- return handle;
-}
-
-int
-glusterfs_fstab_close (glusterfs_fstab_t *h)
-{
- if (!h)
- return -1;
-
- if (h->fp)
- fclose (h->fp);
-
- return 0;
-}
-
-/* Since the values in a line are separated by spaces, a name cannot
- contain a space. Therefore some programs encode spaces in names
- by the strings "\040". We undo the encoding when reading an entry.
- The decoding happens in place. */
-static char *
-decode_name (char *buf)
-{
- char *rp = buf;
- char *wp = buf;
-
- do
- if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '4'
- && rp[3] == '0')
- {
- /* \040 is a SPACE. */
- *wp++ = ' ';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '1')
- {
- /* \011 is a TAB. */
- *wp++ = '\t';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '2')
- {
- /* \012 is a NEWLINE. */
- *wp++ = '\n';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '\\')
- {
- /* We have to escape \\ to be able to represent all
- * characters. */
- *wp++ = '\\';
- rp += 1;
- }
- else if (rp[0] == '\\' && rp[1] == '1' && rp[2] == '3'
- && rp[3] == '4')
- {
- /* \134 is also \\. */
- *wp++ = '\\';
- rp += 3;
- }
- else
- *wp++ = *rp;
- while (*rp++ != '\0');
-
- return buf;
-}
-
-
-/* Read one mount table entry from STREAM. Returns a pointer to storage
- reused on the next call, or null for EOF or error (use feof/ferror to
- check). */
-struct glusterfs_mntent *
-__glusterfs_fstab_getent (FILE *stream, struct glusterfs_mntent *mp,
- char *buffer, int bufsiz)
-{
- char *cp;
- char *head;
-
- do
- {
- char *end_ptr;
-
- if (fgets (buffer, bufsiz, stream) == NULL)
- {
- return NULL;
- }
-
- end_ptr = strchr (buffer, '\n');
- if (end_ptr != NULL) /* chop newline */
- *end_ptr = '\0';
- else
- {
- /* Not the whole line was read. Do it now but forget
- * it. */
- char tmp[1024];
- while (fgets (tmp, sizeof tmp, stream) != NULL)
- if (strchr (tmp, '\n') != NULL)
- break;
- }
-
- head = buffer + strspn (buffer, " \t");
- /* skip empty lines and comment lines: */
- }
- while (head[0] == '\0' || head[0] == '#');
-
- cp = strsep (&head, " \t");
- mp->mnt_fsname = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_dir = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_type = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_opts = cp != NULL ? decode_name (cp) : (char *) "";
- switch (head ? sscanf (head, " %d %d ", &mp->mnt_freq,
- &mp->mnt_passno) : 0)
- {
- case 0:
- mp->mnt_freq = 0;
- case 1:
- mp->mnt_passno = 0;
- case 2:
- break;
- }
-
- return mp;
-}
-
-struct glusterfs_mntent *
-glusterfs_fstab_getent (glusterfs_fstab_t *h)
-{
- if (!h)
- return NULL;
-
- if (!h->fp)
- return NULL;
-
- return __glusterfs_fstab_getent (h->fp, &h->tmpent, h->buf,
- GF_MNTENT_BUFSIZE);
-}
-
-/* We have to use an encoding for names if they contain spaces or tabs.
- To be able to represent all characters we also have to escape the
- backslash itself. This "function" must be a macro since we use
- `alloca'. */
-#define encode_name(name) \
- do { \
- const char *rp = name; \
- \
- while (*rp != '\0') \
- if (*rp == ' ' || *rp == '\t' || *rp == '\\') \
- break; \
- else \
- ++rp; \
- \
- if (*rp != '\0') \
- { \
- /* In the worst case the length of the string \
- * can increase to four times the current \
- * length. */ \
- char *wp; \
- \
- rp = name; \
- name = wp = (char *) alloca (strlen (name) * 4 + 1); \
- \
- do { \
- if (*rp == ' ') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '4'; \
- *wp++ = '0'; \
- } \
- else if (*rp == '\t') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '1'; \
- } \
- else if (*rp == '\n') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '2'; \
- } \
- else if (*rp == '\\') \
- { \
- *wp++ = '\\'; \
- *wp++ = '\\'; \
- } \
- else \
- *wp++ = *rp; \
- } while (*rp++ != '\0'); \
- } \
- } while (0) \
-
-
-int
-glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt)
-{
- struct glusterfs_mntent mntcopy = *mnt;
- if (!h)
- return -1;
-
- if (!h->fp)
- return -1;
-
- if (fseek (h->fp, 0, SEEK_END))
- return -1;
-
- /* Encode spaces and tabs in the names. */
- encode_name (mntcopy.mnt_fsname);
- encode_name (mntcopy.mnt_dir);
- encode_name (mntcopy.mnt_type);
- encode_name (mntcopy.mnt_opts);
-
- return (fprintf (h->fp, "%s %s %s %s %d %d\n",
- mntcopy.mnt_fsname,
- mntcopy.mnt_dir,
- mntcopy.mnt_type,
- mntcopy.mnt_opts,
- mntcopy.mnt_freq,
- mntcopy.mnt_passno)
- < 0 ? 1 : 0);
-}
-
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-char *
-glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt, const char *opt)
-{
- const size_t optlen = strlen (opt);
- char *rest = mnt->mnt_opts, *p;
-
- while ((p = strstr (rest, opt)) != NULL)
- {
- if ((p == rest || p[-1] == ',')
- && (p[optlen] == '\0' || p[optlen] == '=' || p[optlen] == ','))
- return p;
-
- rest = strchr (p, ',');
- if (rest == NULL)
- break;
- ++rest;
- }
-
- return NULL;
-}
-
-void
-clean_init_params (glusterfs_init_params_t *ipars)
-{
- if (!ipars)
- return;
-
- if (ipars->volume_name)
- free (ipars->volume_name);
-
- if (ipars->specfile)
- free (ipars->specfile);
-
- if (ipars->logfile)
- free (ipars->logfile);
-
- if (ipars->loglevel)
- free (ipars->loglevel);
-
- return;
-}
-
-char *
-get_option_value (char *opt)
-{
- char *val = NULL;
- char *saveptr = NULL;
- char *copy_opt = NULL;
- char *retval = NULL;
-
- copy_opt = strdup (opt);
-
- /* Get the = before the value of the option. */
- val = index (copy_opt, '=');
- if (val) {
- /* Move to start of option */
- ++val;
-
- /* Now, to create a '\0' delimited string out of the
- * options string, first get the position where the
- * next option starts, that would be the next ','.
- */
- saveptr = index (val, ',');
- if (saveptr)
- *saveptr = '\0';
- retval = strdup (val);
- }
-
- free (copy_opt);
-
- return retval;
-}
-
-void
-booster_mount (struct glusterfs_mntent *ent)
-{
- char *opt = NULL;
- glusterfs_init_params_t ipars;
- time_t timeout = BOOSTER_DEFAULT_ATTR_TIMEO;
- char *timeostr = NULL;
- char *endptr = NULL;
-
- if (!ent)
- return;
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "Mount entry: volfile: %s,"
- " VMP: %s, Type: %s, Options: %s", ent->mnt_fsname,
- ent->mnt_dir, ent->mnt_type, ent->mnt_opts);
- if ((strcmp (ent->mnt_type, "glusterfs") != 0)) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Type is not glusterfs");
- return;
- }
-
- memset (&ipars, 0, sizeof (glusterfs_init_params_t));
- if (ent->mnt_fsname)
- ipars.specfile = strdup (ent->mnt_fsname);
-
- opt = glusterfs_fstab_hasoption (ent, "subvolume");
- if (opt)
- ipars.volume_name = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-file");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "logfile");
-
- if (opt)
- ipars.logfile = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-level");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "loglevel");
-
- if (opt)
- ipars.loglevel = get_option_value (opt);
-
- /* Attribute cache timeout */
- opt = glusterfs_fstab_hasoption (ent, "attr_timeout");
- if (opt) {
- timeostr = get_option_value (opt);
- if (timeostr)
- timeout = strtol (timeostr, &endptr, 10);
- }
-
- ipars.lookup_timeout = timeout;
- ipars.stat_timeout = timeout;
-
- if ((glusterfs_mount (ent->mnt_dir, &ipars)) == -1)
- gf_log ("booster-fstab", GF_LOG_ERROR, "VMP mounting failed");
-
- clean_init_params (&ipars);
-}
-
-int
-booster_configure (char *confpath)
-{
- int ret = -1;
- glusterfs_fstab_t *handle = NULL;
- struct glusterfs_mntent *ent = NULL;
-
- if (!confpath)
- goto out;
-
- handle = glusterfs_fstab_init (confpath, "r");
- if (!handle)
- goto out;
-
- while ((ent = glusterfs_fstab_getent (handle)) != NULL)
- booster_mount (ent);
-
- glusterfs_fstab_close (handle);
- ret = 0;
-out:
- return ret;
-}
-
-
diff --git a/booster/src/booster_fstab.h b/booster/src/booster_fstab.h
deleted file mode 100644
index 9bab04c5a..000000000
--- a/booster/src/booster_fstab.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef GLUSTERFS_FSTAB_MNTENT_H
-#define GLUSTERFS_FSTAB_MNTENT_H 1
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "compat.h"
-
-/* General filesystem types. */
-#define GF_MNTTYPE_IGNORE "ignore" /* Ignore this entry. */
-#define GF_MNTTYPE_NFS "nfs" /* Network file system. */
-#define GF_MNTTYPE_SWAP "swap" /* Swap device. */
-
-
-/* Generic mount options. */
-#define GF_MNTOPT_DEFAULTS "defaults" /* Use all default options. */
-#define GF_MNTOPT_RO "ro" /* Read only. */
-#define GF_MNTOPT_RW "rw" /* Read/write. */
-#define GF_MNTOPT_SUID "suid" /* Set uid allowed. */
-#define GF_MNTOPT_NOSUID "nosuid" /* No set uid allowed. */
-#define GF_MNTOPT_NOAUTO "noauto" /* Do not auto mount. */
-
-
-/* Structure describing a mount table entry. */
-struct glusterfs_mntent
-{
- char *mnt_fsname; /* Device or server for filesystem. */
- char *mnt_dir; /* Directory mounted on. */
- char *mnt_type; /* Type of filesystem: ufs, nfs, etc. */
- char *mnt_opts; /* Comma-separated options for fs. */
- int mnt_freq; /* Dump frequency (in days). */
- int mnt_passno; /* Pass number for `fsck'. */
-};
-
-#define GF_MNTENT_BUFSIZE 1024
-typedef struct glusterfs_fstab_handle {
- FILE *fp;
- char buf[GF_MNTENT_BUFSIZE];
- struct glusterfs_mntent tmpent;
-}glusterfs_fstab_t;
-
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-extern glusterfs_fstab_t *glusterfs_fstab_init (const char *file,
- const char *mode);
-
-extern struct glusterfs_mntent *glusterfs_fstab_getent (glusterfs_fstab_t *h);
-
-/* Write the mount table entry described by MNT to STREAM.
- Return zero on success, nonzero on failure. */
-extern int glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt);
-
-/* Close a stream opened with `glusterfs_fstab_init'. */
-extern int glusterfs_fstab_close (glusterfs_fstab_t *h);
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-extern char *glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt,
- const char *opt);
-
-#endif
diff --git a/booster/src/booster_stat.c b/booster/src/booster_stat.c
deleted file mode 100644
index 8f76cfe37..000000000
--- a/booster/src/booster_stat.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/types.h>
-
-extern int
-booster_stat (const char *path, void *buf);
-
-extern int
-booster_stat64 (const char *path, void *buf);
-
-extern int
-booster_xstat (int ver, const char *path, void *buf);
-
-extern int
-booster_xstat64 (int ver, const char *path, void *buf);
-
-extern int
-booster_fxstat (int ver, int fd, void *buf);
-extern int
-booster_fxstat64 (int ver, int fd, void *buf);
-extern int
-booster_fstat (int fd, void *buf);
-extern int
-booster_fstat64 (int fd, void *buf);
-
-extern int
-booster_lstat (const char *path, void *buf);
-extern int
-booster_lstat64 (const char *path, void *buf);
-extern int
-booster_lxstat (int ver, const char *path, void *buf);
-extern int
-booster_lxstat64 (int ver, const char *path, void *buf);
-
-
-extern int
-booster_statfs (const char *path, void *buf);
-extern int
-booster_statfs64 (const char *path, void *buf);
-
-extern int
-booster_statvfs (const char *path, void *buf);
-
-extern int
-booster_statvfs64 (const char *path, void *buf);
-
-extern void *
-booster_readdir (void *dir);
-
-extern void
-booster_rewinddir (void *dir);
-
-extern void
-booster_seekdir (void *dir, off_t offset);
-
-extern off_t
-booster_telldir (void *dir);
-
-int
-stat (const char *path, void *buf)
-{
- return booster_stat (path, buf);
-}
-
-int
-stat64 (const char *path, void *buf)
-{
- return booster_stat64 (path, buf);
-}
-
-int
-__xstat (int ver, const char *path, void *buf)
-{
- return booster_xstat (ver, path, buf);
-}
-
-int
-__xstat64 (int ver, const char *path, void *buf)
-{
- return booster_xstat64 (ver, path, buf);
-}
-
-int
-__fxstat (int ver, int fd, void *buf)
-{
- return booster_fxstat (ver, fd, buf);
-}
-
-int
-__fxstat64 (int ver, int fd, void *buf)
-{
- return booster_fxstat64 (ver, fd, buf);
-}
-
-int
-fstat (int fd, void *buf)
-{
- return booster_fstat (fd, buf);
-}
-
-int
-fstat64 (int fd, void *buf)
-{
- return booster_fstat64 (fd, buf);
-}
-
-int
-lstat (const char *path, void *buf)
-{
- return booster_lstat (path, buf);
-}
-
-int
-lstat64 (const char *path, void *buf)
-{
- return booster_lstat64 (path, buf);
-}
-
-int
-__lxstat (int ver, const char *path, void *buf)
-{
- return booster_lxstat (ver, path, buf);
-}
-
-int
-__lxstat64 (int ver, const char *path, void *buf)
-{
- return booster_lxstat64 (ver, path, buf);
-}
-
-int
-statfs (const char *pathname, void *buf)
-{
- return booster_statfs (pathname, buf);
-}
-
-int
-statfs64 (const char *pathname, void *buf)
-{
- return booster_statfs64 (pathname, buf);
-}
-
-int
-statvfs (const char *pathname, void *buf)
-{
- return booster_statvfs (pathname, buf);
-}
-
-int
-statvfs64 (const char *pathname, void *buf)
-{
- return booster_statvfs64 (pathname, buf);
-}
-
-void
-rewinddir (void *dir)
-{
- return booster_rewinddir (dir);
-}
-
-void
-seekdir (void *dir, off_t offset)
-{
- return booster_seekdir (dir, offset);
-}
-
-off_t
-telldir (void *dir)
-{
- return booster_telldir (dir);
-}
diff --git a/cli/src/Makefile.am b/cli/src/Makefile.am
index b76a9efd3..216d1bb55 100644
--- a/cli/src/Makefile.am
+++ b/cli/src/Makefile.am
@@ -2,23 +2,26 @@ sbin_PROGRAMS = gluster
gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c \
cli-cmd-volume.c cli-cmd-peer.c cli-rpc-ops.c cli-cmd-parser.c\
- cli-cmd-system.c cli-cmd-misc.c
+ cli-cmd-system.c cli-cmd-misc.c cli-xml-output.c cli-cmd-snapshot.c
gluster_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)\
$(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(GF_GLUSTERFS_LIBS) $(XML_LIBS)
-gluster_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS)
+gluster_LDFLAGS = $(GF_LDFLAGS)
noinst_HEADERS = cli.h cli-mem-types.h cli-cmd.h
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
-I$(top_srcdir)/rpc/xdr/src\
-DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_GLUSTERFS_CFLAGS)\
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
-DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
- -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) -DSBIN_DIR=\"$(sbindir)\"
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) -DSBIN_DIR=\"$(sbindir)\"\
+ $(XML_CPPFLAGS)
+AM_CFLAGS = -Wall $(GF_GLUSTERFS_CFLAGS)
CLEANFILES =
diff --git a/cli/src/cli-cmd-misc.c b/cli/src/cli-cmd-misc.c
index 40f419cde..566d7c978 100644
--- a/cli/src/cli-cmd-misc.c
+++ b/cli/src/cli-cmd-misc.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -41,6 +31,8 @@ extern struct cli_cmd volume_cmds[];
extern struct cli_cmd cli_probe_cmds[];
extern struct cli_cmd cli_log_cmds[];
extern struct cli_cmd cli_system_cmds[];
+extern struct cli_cmd cli_bd_cmds[];
+extern struct cli_cmd snapshot_cmds[];
struct cli_cmd cli_misc_cmds[];
int
@@ -55,7 +47,8 @@ cli_cmd_display_help (struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount)
{
struct cli_cmd *cmd[] = {volume_cmds, cli_probe_cmds,
- cli_misc_cmds, NULL};
+ cli_misc_cmds, snapshot_cmds,
+ NULL};
struct cli_cmd *cmd_ind = NULL;
int i = 0;
@@ -80,6 +73,10 @@ struct cli_cmd cli_misc_cmds[] = {
cli_cmd_display_help,
"display command options"},
+ { "exit",
+ cli_cmd_quit_cbk,
+ "exit"},
+
{ NULL, NULL, NULL }
};
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index d3cb1240f..5ab208b8f 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -36,6 +26,37 @@
#include "protocol-common.h"
#include "cli1-xdr.h"
+#define MAX_SNAP_DESCRIPTION_LEN 1024
+
+struct snap_config_opt_vals_ snap_confopt_vals[] = {
+ {.op_name = "snap-max-hard-limit",
+ .question = "Changing snapshot-max-hard-limit "
+ "will lead to deletion of snapshots "
+ "if they exceed the new limit.\n"
+ "Do you want to continue?"
+ },
+ {.op_name = "snap-max-soft-limit",
+ .question = "Changing snapshot-max-soft-limit "
+ "will lead to deletion of snapshots "
+ "if they exceed the new limit.\n"
+ "Do you want to continue?"
+ },
+ {.op_name = "both",
+ .question = "Changing snapshot-max-hard-limit & "
+ "snapshot-max-soft-limit will lead to "
+ "deletion of snapshots if they exceed "
+ "the new limit.\nDo you want to continue?"
+ },
+ {.op_name = NULL,
+ }
+};
+
+enum cli_snap_config_set_types {
+ GF_SNAP_CONFIG_SET_HARD = 0,
+ GF_SNAP_CONFIG_SET_SOFT = 1,
+ GF_SNAP_CONFIG_SET_BOTH = 2,
+};
+typedef enum cli_snap_config_set_types cli_snap_config_set_types;
static const char *
id_sel (void *wcon)
@@ -75,21 +96,20 @@ cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
brick_list_len++;
while (brick_index < wordcount) {
if (validate_brick_name ((char *)words[brick_index])) {
- cli_out ("Wrong brick type: %s, use <HOSTNAME>:"
+ cli_err ("Wrong brick type: %s, use <HOSTNAME>:"
"<export-dir-abs-path>", words[brick_index]);
ret = -1;
goto out;
} else {
delimiter = strrchr (words[brick_index], ':');
- ret = cli_canonicalize_path (delimiter + 1);
+ ret = gf_canonicalize_path (delimiter + 1);
if (ret)
goto out;
}
if ((brick_list_len + strlen (words[brick_index]) + 1) > sizeof (brick_list)) {
- gf_log ("cli", GF_LOG_ERROR,
- "total brick list is larger than a request "
- "can take (brick_count %d)", *brick_count);
+ cli_err ("Total brick list is larger than a request. "
+ "Can take (brick_count %d)", *brick_count);
ret = -1;
goto out;
}
@@ -109,16 +129,18 @@ cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
}
if (!(strcmp (host_name, "localhost") &&
- strcmp (host_name, "127.0.0.1"))) {
- cli_out ("Please provide a valid hostname/ip other "
- "than localhost or 127.0.0.1");
+ strcmp (host_name, "127.0.0.1") &&
+ strncmp (host_name, "0.", 2))) {
+ cli_err ("Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
ret = -1;
GF_FREE (tmp_host);
goto out;
}
- if (!valid_internet_address (host_name)) {
- cli_out ("internet address '%s' does not comform to "
- "standards", host_name);
+ if (!valid_internet_address (host_name, _gf_false)) {
+ cli_err ("internet address '%s' does not conform to "
+ "standards", host_name);
}
GF_FREE (tmp_host);
tmp_list = gf_strdup (brick_list + 1);
@@ -132,7 +154,7 @@ cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
strtok_r (tmp_list, " ", &tmpptr);
if (!(strcmp (tmp_list, words[brick_index]))) {
ret = -1;
- cli_out ("Found duplicate"
+ cli_err ("Found duplicate"
" exports %s",words[brick_index]);
goto out;
}
@@ -150,8 +172,7 @@ cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
if (!*bricks)
ret = -1;
out:
- if (free_list_ptr)
- GF_FREE (free_list_ptr);
+ GF_FREE (free_list_ptr);
return ret;
}
@@ -171,10 +192,18 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
char *bricks = NULL;
int32_t brick_count = 0;
char *opwords[] = { "replica", "stripe", "transport", NULL };
+
+ char *invalid_volnames[] = {"volume", "type", "subvolumes", "option",
+ "end-volume", "all", "volume_not_in_ring",
+ "description", "force",
+ "snap-max-hard-limit",
+ "snap-max-soft-limit", NULL};
char *w = NULL;
int op_count = 0;
int32_t replica_count = 1;
int32_t stripe_count = 1;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
GF_ASSERT (words);
GF_ASSERT (options);
@@ -196,9 +225,12 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
if (volname[0] == '-')
goto out;
- if (!strcmp (volname, "all")) {
- cli_out ("\"all\" cannot be the name of a volume.");
- goto out;
+ for (i = 0; invalid_volnames[i]; i++) {
+ if (!strcmp (volname, invalid_volnames[i])) {
+ cli_err ("\"%s\" cannot be the name of a volume.",
+ volname);
+ goto out;
+ }
}
if (strchr (volname, '/'))
@@ -229,7 +261,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
switch (type) {
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
case GF_CLUSTER_TYPE_REPLICATE:
- cli_out ("replica option given twice");
+ cli_err ("replica option given twice");
goto out;
case GF_CLUSTER_TYPE_NONE:
type = GF_CLUSTER_TYPE_REPLICATE;
@@ -245,7 +277,8 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
}
replica_count = strtol (words[index+1], NULL, 0);
if (replica_count < 2) {
- cli_out ("replica count should be greater than 1");
+ cli_err ("replica count should be greater"
+ " than 1");
ret = -1;
goto out;
}
@@ -259,7 +292,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
switch (type) {
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
case GF_CLUSTER_TYPE_STRIPE:
- cli_out ("stripe option given twice");
+ cli_err ("stripe option given twice");
goto out;
case GF_CLUSTER_TYPE_NONE:
type = GF_CLUSTER_TYPE_STRIPE;
@@ -274,7 +307,8 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
}
stripe_count = strtol (words[index+1], NULL, 0);
if (stripe_count < 2) {
- cli_out ("stripe count should be greater than 1");
+ cli_err ("stripe count should be greater"
+ " than 1");
ret = -1;
goto out;
}
@@ -286,7 +320,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
} else if ((strcmp (w, "transport")) == 0) {
if (trans_type) {
- cli_out ("'transport' option given more"
+ cli_err ("'transport' option given more"
" than one time");
goto out;
}
@@ -304,7 +338,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
goto out;
}
index += 2;
- } else {
+ } else {
GF_ASSERT (!"opword mismatch");
ret = -1;
goto out;
@@ -327,7 +361,12 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
brick_index = index;
- ret = cli_cmd_bricks_parse (words, wordcount, brick_index, &bricks,
+ if (strcmp (words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
+
+ ret = cli_cmd_bricks_parse (words, wc, brick_index, &bricks,
&brick_count);
if (ret)
goto out;
@@ -335,20 +374,20 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
/* If brick-count is not valid when replica or stripe is
given, exit here */
if (!brick_count) {
- cli_out ("No bricks specified");
+ cli_err ("No bricks specified");
ret = -1;
goto out;
}
if (brick_count % sub_count) {
if (type == GF_CLUSTER_TYPE_STRIPE)
- cli_out ("number of bricks is not a multiple of "
+ cli_err ("number of bricks is not a multiple of "
"stripe count");
else if (type == GF_CLUSTER_TYPE_REPLICATE)
- cli_out ("number of bricks is not a multiple of "
+ cli_err ("number of bricks is not a multiple of "
"replica count");
else
- cli_out ("number of bricks given doesn't match "
+ cli_err ("number of bricks given doesn't match "
"required count");
ret = -1;
@@ -367,7 +406,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
ret = dict_set_dynstr (dict, "transport", trans_type);
if (ret)
goto out;
-
+ trans_type = NULL;
ret = dict_set_dynstr (dict, "bricks", bricks);
if (ret)
@@ -377,6 +416,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
if (ret)
goto out;
+ ret = dict_set_int32 (dict, "force", is_force);
+ if (ret)
+ goto out;
+
*options = dict;
out:
@@ -385,6 +428,9 @@ out:
if (dict)
dict_destroy (dict);
}
+
+ GF_FREE (trans_type);
+
return ret;
}
@@ -498,7 +544,7 @@ cli_cmd_quota_parse (const char **words, int wordcount, dict_t **options)
goto out;
if (!strcmp (volname, "all")) {
- cli_out ("\"all\" cannot be the name of a volume.");
+ cli_err ("\"all\" cannot be the name of a volume.");
goto out;
}
@@ -523,16 +569,26 @@ cli_cmd_quota_parse (const char **words, int wordcount, dict_t **options)
goto out;
}
- if ((strcmp (w, "enable")) == 0 && wordcount == 4) {
- type = GF_QUOTA_OPTION_TYPE_ENABLE;
- ret = 0;
- goto set_type;
+ if (strcmp (w, "enable") == 0) {
+ if (wordcount == 4) {
+ type = GF_QUOTA_OPTION_TYPE_ENABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
+ }
}
- if (strcmp (w, "disable") == 0 && wordcount == 4) {
- type = GF_QUOTA_OPTION_TYPE_DISABLE;
- ret = 0;
- goto set_type;
+ if (strcmp (w, "disable") == 0) {
+ if (wordcount == 4) {
+ type = GF_QUOTA_OPTION_TYPE_DISABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
+ }
}
if (strcmp (w, "limit-usage") == 0) {
@@ -544,7 +600,7 @@ cli_cmd_quota_parse (const char **words, int wordcount, dict_t **options)
type = GF_QUOTA_OPTION_TYPE_LIMIT_USAGE;
if (words[4][0] != '/') {
- cli_out ("Please enter absolute path");
+ cli_err ("Please enter absolute path");
return -2;
}
@@ -553,14 +609,14 @@ cli_cmd_quota_parse (const char **words, int wordcount, dict_t **options)
goto out;
if (!words[5]) {
- cli_out ("Please enter the limit value to be set");
+ cli_err ("Please enter the limit value to be set");
return -2;
}
ret = gf_string2bytesize (words[5], &value);
if (ret != 0) {
- cli_out ("Please enter a correct value");
+ cli_err ("Please enter a correct value");
return -1;
}
@@ -579,7 +635,7 @@ cli_cmd_quota_parse (const char **words, int wordcount, dict_t **options)
type = GF_QUOTA_OPTION_TYPE_REMOVE;
if (words[4][0] != '/') {
- cli_out ("Please enter absolute path");
+ cli_err ("Please enter absolute path");
return -2;
}
@@ -635,17 +691,117 @@ out:
return ret;
}
+static inline gf_boolean_t
+cli_is_key_spl (char *key)
+{
+ return (strcmp (key, "group") == 0);
+}
+
+#define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd"
+static int
+cli_add_key_group (dict_t *dict, char *key, char *value, char **op_errstr)
+{
+ int ret = -1;
+ int opt_count = 0;
+ char iter_key[1024] = {0,};
+ char iter_val[1024] = {0,};
+ char *saveptr = NULL;
+ char *tok_key = NULL;
+ char *tok_val = NULL;
+ char *dkey = NULL;
+ char *dval = NULL;
+ char *tagpath = NULL;
+ char *buf = NULL;
+ char line[PATH_MAX + 256] = {0,};
+ char errstr[2048] = "";
+ FILE *fp = NULL;
+
+ ret = gf_asprintf (&tagpath, "%s/groups/%s",
+ GLUSTERD_DEFAULT_WORKDIR, value);
+ if (ret == -1) {
+ tagpath = NULL;
+ goto out;
+ }
+
+ fp = fopen (tagpath, "r");
+ if (!fp) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr), "Unable to open file '%s'."
+ " Error: %s", tagpath, strerror (errno));
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ goto out;
+ }
+
+ opt_count = 0;
+ buf = line;
+ while (fscanf (fp, "%s", buf) != EOF) {
+
+ opt_count++;
+ tok_key = strtok_r (line, "=", &saveptr);
+ tok_val = strtok_r (NULL, "=", &saveptr);
+ if (!tok_key || !tok_val) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr), "'%s' file format "
+ "not valid.", tagpath);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ goto out;
+ }
+
+ snprintf (iter_key, sizeof (iter_key), "key%d", opt_count);
+ dkey = gf_strdup (tok_key);
+ ret = dict_set_dynstr (dict, iter_key, dkey);
+ if (ret)
+ goto out;
+ dkey = NULL;
+
+ snprintf (iter_val, sizeof (iter_val), "value%d", opt_count);
+ dval = gf_strdup (tok_val);
+ ret = dict_set_dynstr (dict, iter_val, dval);
+ if (ret)
+ goto out;
+ dval = NULL;
+
+ }
+
+ if (!opt_count) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr), "'%s' file format "
+ "not valid.", tagpath);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "count", opt_count);
+out:
+
+ GF_FREE (tagpath);
+
+ if (ret) {
+ GF_FREE (dkey);
+ GF_FREE (dval);
+ }
+
+ if (fp)
+ fclose (fp);
+
+ return ret;
+}
+#undef GLUSTERD_DEFAULT_WORKDIR
+
int32_t
-cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options,
+ char **op_errstr)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int count = 0;
- char *key = NULL;
- char *value = NULL;
- int i = 0;
- char str[50] = {0,};
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int count = 0;
+ char *key = NULL;
+ char *value = NULL;
+ int i = 0;
+ char str[50] = {0,};
GF_ASSERT (words);
GF_ASSERT (options);
@@ -667,28 +823,65 @@ cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options)
if (ret)
goto out;
- if (!strcmp (volname, "help") && wordcount == 3 )
- ret = dict_set_str (dict, "help", volname);
-
- if (!strcmp (volname, "help-xml") && wordcount == 3 )
- ret = dict_set_str (dict, "help-xml", volname);
+ if ((!strcmp (volname, "help") || !strcmp (volname, "help-xml"))
+ && wordcount == 3 ) {
+ ret = dict_set_str (dict, volname, volname);
+ if (ret)
+ goto out;
- if (ret)
+ } else if (wordcount < 5) {
+ ret = -1;
goto out;
+ } else if (wordcount == 5 && cli_is_key_spl ((char *)words[3])) {
+ key = (char *) words[3];
+ value = (char *) words[4];
+ if ( !key || !value) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_strip_whitespace (value, strlen (value));
+ if (ret == -1)
+ goto out;
+
+ if (strlen (value) == 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_add_key_group (dict, key, value, op_errstr);
+ if (ret == 0)
+ *options = dict;
+ goto out;
+ }
for (i = 3; i < wordcount; i+=2) {
- key = (char *) words[i];
- value = (char *) words[i+1];
+ key = (char *) words[i];
+ value = (char *) words[i+1];
- if ( !key || !value) {
- ret = -1;
- goto out;
- }
+ if ( !key || !value) {
+ ret = -1;
+ goto out;
+ }
count++;
+ ret = gf_strip_whitespace (value, strlen (value));
+ if (ret == -1)
+ goto out;
+
+ if (strlen (value) == 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (cli_is_key_spl (key)) {
+ ret = -1;
+ goto out;
+ }
+
sprintf (str, "key%d", count);
ret = dict_set_str (dict, str, key);
if (ret)
@@ -709,10 +902,8 @@ cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options)
*options = dict;
out:
- if (ret) {
- if (dict)
- dict_destroy (dict);
- }
+ if (ret)
+ dict_destroy (dict);
return ret;
}
@@ -726,6 +917,13 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
int ret = -1;
int brick_count = 0, brick_index = 0;
char *bricks = NULL;
+ char *opwords_cl[] = { "replica", "stripe", NULL };
+ gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
+ int count = 1;
+ char *w = NULL;
+ int index;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
GF_ASSERT (words);
GF_ASSERT (options);
@@ -751,9 +949,65 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
ret = -1;
goto out;
}
+ if (wordcount < 6) {
+ /* seems no options are given, go directly to the parse_brick */
+ brick_index = 3;
+ type = GF_CLUSTER_TYPE_NONE;
+ goto parse_bricks;
+ }
- brick_index = 3;
- ret = cli_cmd_bricks_parse (words, wordcount, brick_index, &bricks,
+ w = str_getunamb (words[3], opwords_cl);
+ if (!w) {
+ type = GF_CLUSTER_TYPE_NONE;
+ index = 3;
+ } else if ((strcmp (w, "replica")) == 0) {
+ type = GF_CLUSTER_TYPE_REPLICATE;
+ if (wordcount < 5) {
+ ret = -1;
+ goto out;
+ }
+ count = strtol (words[4], NULL, 0);
+ if (!count || (count < 2)) {
+ cli_err ("replica count should be greater than 1");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "replica-count", count);
+ if (ret)
+ goto out;
+ index = 5;
+ } else if ((strcmp (w, "stripe")) == 0) {
+ type = GF_CLUSTER_TYPE_STRIPE;
+ if (wordcount < 5) {
+ ret = -1;
+ goto out;
+ }
+ count = strtol (words[4], NULL, 0);
+ if (!count || (count < 2)) {
+ cli_err ("stripe count should be greater than 1");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "stripe-count", count);
+ if (ret)
+ goto out;
+ index = 5;
+ } else {
+ GF_ASSERT (!"opword mismatch");
+ ret = -1;
+ goto out;
+ }
+
+ brick_index = index;
+
+parse_bricks:
+
+ if (strcmp (words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
+
+ ret = cli_cmd_bricks_parse (words, wc, brick_index, &bricks,
&brick_count);
if (ret)
goto out;
@@ -767,6 +1021,10 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
if (ret)
goto out;
+ ret = dict_set_int32 (dict, "force", is_force);
+ if (ret)
+ goto out;
+
*options = dict;
out:
@@ -794,20 +1052,21 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
int32_t j = 0;
char *tmp_brick = NULL;
char *tmp_brick1 = NULL;
- char *opwords[] = { "start", "commit", "pause", "abort", "status",
+ char *type_opword[] = { "replica", NULL };
+ char *opwords[] = { "start", "commit", "stop", "status",
"force", NULL };
char *w = NULL;
int32_t command = GF_OP_CMD_NONE;
+ long count = 0;
GF_ASSERT (words);
GF_ASSERT (options);
- dict = dict_new ();
-
- if (!dict)
+ if (wordcount < 4)
goto out;
- if (wordcount < 3)
+ dict = dict_new ();
+ if (!dict)
goto out;
volname = (char *)words[2];
@@ -818,6 +1077,29 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
if (ret)
goto out;
+ brick_index = 3;
+ w = str_getunamb (words[3], type_opword);
+ if (w && !strcmp ("replica", w)) {
+ if (wordcount < 5) {
+ ret = -1;
+ goto out;
+ }
+ count = strtol (words[4], NULL, 0);
+ if (count < 1) {
+ cli_err ("replica count should be greater than 0 in "
+ "case of remove-brick");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "replica-count", count);
+ if (ret)
+ goto out;
+ brick_index = 5;
+ } else if (w) {
+ GF_ASSERT (!"opword mismatch");
+ }
+
w = str_getunamb (words[wordcount - 1], opwords);
if (!w) {
/* Should be default 'force' */
@@ -833,10 +1115,8 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
command = GF_OP_CMD_COMMIT;
if (question)
*question = 1;
- } else if (!strcmp ("pause", w)) {
- command = GF_OP_CMD_PAUSE;
- } else if (!strcmp ("abort", w)) {
- command = GF_OP_CMD_ABORT;
+ } else if (!strcmp ("stop", w)) {
+ command = GF_OP_CMD_STOP;
} else if (!strcmp ("status", w)) {
command = GF_OP_CMD_STATUS;
} else if (!strcmp ("force", w)) {
@@ -861,8 +1141,6 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
command);
- brick_index = 3;
-
tmp_index = brick_index;
tmp_brick = GF_MALLOC(2048 * sizeof(*tmp_brick), gf_common_mt_char);
@@ -884,13 +1162,13 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
while (brick_index < wordcount) {
if (validate_brick_name ((char *)words[brick_index])) {
- cli_out ("wrong brick type: %s, use <HOSTNAME>:"
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
"<export-dir-abs-path>", words[brick_index]);
ret = -1;
goto out;
} else {
delimiter = strrchr(words[brick_index], ':');
- ret = cli_canonicalize_path (delimiter + 1);
+ ret = gf_canonicalize_path (delimiter + 1);
if (ret)
goto out;
}
@@ -902,7 +1180,7 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
if (!(strcmp (tmp_brick, tmp_brick1))) {
gf_log("",GF_LOG_ERROR, "Duplicate bricks"
" found %s", words[brick_index]);
- cli_out("Duplicate bricks found %s",
+ cli_err("Duplicate bricks found %s",
words[brick_index]);
ret = -1;
goto out;
@@ -929,10 +1207,8 @@ out:
dict_destroy (dict);
}
- if (tmp_brick)
- GF_FREE (tmp_brick);
- if (tmp_brick1)
- GF_FREE (tmp_brick1);
+ GF_FREE (tmp_brick);
+ GF_FREE (tmp_brick1);
return ret;
}
@@ -951,6 +1227,7 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
char *opwords[] = { "start", "commit", "pause", "abort", "status",
NULL };
char *w = NULL;
+ gf_boolean_t is_force = _gf_false;
GF_ASSERT (words);
GF_ASSERT (options);
@@ -978,13 +1255,13 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
}
if (validate_brick_name ((char *)words[3])) {
- cli_out ("wrong brick type: %s, use "
+ cli_err ("wrong brick type: %s, use "
"<HOSTNAME>:<export-dir-abs-path>", words[3]);
ret = -1;
goto out;
} else {
delimiter = strrchr ((char *)words[3], ':');
- ret = cli_canonicalize_path (delimiter + 1);
+ ret = gf_canonicalize_path (delimiter + 1);
if (ret)
goto out;
}
@@ -999,13 +1276,13 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
}
if (validate_brick_name ((char *)words[4])) {
- cli_out ("wrong brick type: %s, use "
+ cli_err ("wrong brick type: %s, use "
"<HOSTNAME>:<export-dir-abs-path>", words[4]);
ret = -1;
goto out;
} else {
delimiter = strrchr ((char *)words[4], ':');
- ret = cli_canonicalize_path (delimiter + 1);
+ ret = gf_canonicalize_path (delimiter + 1);
if (ret)
goto out;
}
@@ -1039,6 +1316,7 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
GF_ASSERT (!"opword mismatch");
/* commit force option */
+
op_index = 6;
if (wordcount > (op_index + 1)) {
@@ -1047,8 +1325,17 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
}
if (wordcount == (op_index + 1)) {
+ if ((replace_op != GF_REPLACE_OP_COMMIT) &&
+ (replace_op != GF_REPLACE_OP_START)) {
+ ret = -1;
+ goto out;
+ }
if (!strcmp ("force", words[op_index])) {
- replace_op = GF_REPLACE_OP_COMMIT_FORCE;
+ if (replace_op == GF_REPLACE_OP_COMMIT)
+ replace_op = GF_REPLACE_OP_COMMIT_FORCE;
+
+ else if (replace_op == GF_REPLACE_OP_START)
+ is_force = _gf_true;
}
}
@@ -1062,14 +1349,15 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
if (ret)
goto out;
-
-
+ ret = dict_set_int32 (dict, "force", is_force);
+ if (ret)
+ goto out;
*options = dict;
out:
if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to parse remove-brick CLI");
+ gf_log ("cli", GF_LOG_ERROR, "Unable to parse replace-brick CLI");
if (dict)
dict_destroy (dict);
}
@@ -1105,12 +1393,12 @@ cli_cmd_log_filename_parse (const char **words, int wordcount, dict_t **options)
delimiter = strchr (words[4], ':');
if (!delimiter || delimiter == words[4]
|| *(delimiter+1) != '/') {
- cli_out ("wrong brick type: %s, use <HOSTNAME>:"
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
"<export-dir-abs-path>", words[4]);
ret = -1;
goto out;
} else {
- ret = cli_canonicalize_path (delimiter + 1);
+ ret = gf_canonicalize_path (delimiter + 1);
if (ret)
goto out;
}
@@ -1160,8 +1448,8 @@ cli_cmd_log_level_parse (const char **words, int worcount, dict_t **options)
ret = glusterd_check_log_level(words[5]);
if (ret == -1) {
- cli_out("Invalid log level [%s] specified", words[5]);
- cli_out("Valid values for loglevel: (DEBUG|WARNING|ERROR"
+ cli_err("Invalid log level [%s] specified", words[5]);
+ cli_err("Valid values for loglevel: (DEBUG|WARNING|ERROR"
"|CRITICAL|NONE|TRACE)");
goto out;
}
@@ -1221,12 +1509,12 @@ cli_cmd_log_locate_parse (const char **words, int wordcount, dict_t **options)
delimiter = strchr (words[4], ':');
if (!delimiter || delimiter == words[4]
|| *(delimiter+1) != '/') {
- cli_out ("wrong brick type: %s, use <HOSTNAME>:"
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
"<export-dir-abs-path>", words[4]);
ret = -1;
goto out;
} else {
- ret = cli_canonicalize_path (delimiter + 1);
+ ret = gf_canonicalize_path (delimiter + 1);
if (ret)
goto out;
}
@@ -1272,12 +1560,12 @@ cli_cmd_log_rotate_parse (const char **words, int wordcount, dict_t **options)
delimiter = strchr (words[4], ':');
if (!delimiter || delimiter == words[4]
|| *(delimiter+1) != '/') {
- cli_out ("wrong brick type: %s, use <HOSTNAME>:"
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
"<export-dir-abs-path>", words[4]);
ret = -1;
goto out;
} else {
- ret = cli_canonicalize_path (delimiter + 1);
+ ret = gf_canonicalize_path (delimiter + 1);
if (ret)
goto out;
}
@@ -1308,22 +1596,161 @@ gsyncd_glob_check (const char *w)
return !!strpbrk (w, "*?[");
}
+static int
+config_parse (const char **words, int wordcount, dict_t *dict,
+ unsigned cmdi, unsigned glob)
+{
+ int32_t ret = -1;
+ int32_t i = -1;
+ char *append_str = NULL;
+ size_t append_len = 0;
+ char *subop = NULL;
+
+ switch ((wordcount - 1) - cmdi) {
+ case 0:
+ subop = gf_strdup ("get-all");
+ break;
+ case 1:
+ if (words[cmdi + 1][0] == '!') {
+ (words[cmdi + 1])++;
+ if (gf_asprintf (&subop, "del%s",
+ glob ? "-glob" : "") == -1)
+ subop = NULL;
+ } else
+ subop = gf_strdup ("get");
+
+ ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
+ goto out;
+ break;
+ default:
+ if (gf_asprintf (&subop, "set%s", glob ? "-glob" : "") == -1)
+ subop = NULL;
+
+ ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
+ goto out;
+
+ /* join the varargs by spaces to get the op_value */
+
+ for (i = cmdi + 2; i < wordcount; i++)
+ append_len += (strlen (words[i]) + 1);
+ /* trailing strcat will add two bytes, make space for that */
+ append_len++;
+
+ append_str = GF_CALLOC (1, append_len, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = cmdi + 2; i < wordcount; i++) {
+ strcat (append_str, words[i]);
+ strcat (append_str, " ");
+ }
+ append_str[append_len - 2] = '\0';
+ /* "checkpoint now" is special: we resolve that "now" */
+ if (strcmp (words[cmdi + 1], "checkpoint") == 0 &&
+ strcmp (append_str, "now") == 0) {
+ struct timeval tv = {0,};
+
+ ret = gettimeofday (&tv, NULL);
+ if (ret == -1)
+ goto out; /* FIXME: free append_str? */
+
+ GF_FREE (append_str);
+ append_str = GF_CALLOC (1, 300, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+ strcpy (append_str, "as of ");
+ gf_time_fmt (append_str + strlen ("as of "),
+ 300 - strlen ("as of "),
+ tv.tv_sec, gf_timefmt_FT);
+ }
+
+ ret = dict_set_dynstr (dict, "op_value", append_str);
+ }
+
+ ret = -1;
+ if (subop) {
+ ret = dict_set_dynstr (dict, "subop", subop);
+ if (!ret)
+ subop = NULL;
+ }
+
+out:
+ if (ret && append_str)
+ GF_FREE (append_str);
+
+ GF_FREE (subop);
+
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int32_t
+force_push_pem_parse (const char **words, int wordcount,
+ dict_t *dict, unsigned *cmdi)
+{
+ int32_t ret = 0;
+
+ if (!strcmp ((char *)words[wordcount-1], "force")) {
+ if ((strcmp ((char *)words[wordcount-2], "start")) &&
+ (strcmp ((char *)words[wordcount-2], "stop")) &&
+ (strcmp ((char *)words[wordcount-2], "create")) &&
+ (strcmp ((char *)words[wordcount-2], "push-pem"))) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32 (dict, "force",
+ _gf_true);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+
+ if (!strcmp ((char *)words[wordcount-2], "push-pem")) {
+ if (strcmp ((char *)words[wordcount-3], "create")) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "push_pem", 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+ }
+ } else if (!strcmp ((char *)words[wordcount-1], "push-pem")) {
+ if (strcmp ((char *)words[wordcount-2], "create")) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "push_pem", 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+ }
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
int32_t
cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
{
int32_t ret = -1;
dict_t *dict = NULL;
gf1_cli_gsync_set type = GF_GSYNC_OPTION_TYPE_NONE;
- char *append_str = NULL;
- size_t append_len = 0;
- char *subop = NULL;
int i = 0;
unsigned masteri = 0;
unsigned slavei = 0;
unsigned glob = 0;
unsigned cmdi = 0;
- char *opwords[] = { "status", "start", "stop", "config",
- NULL };
+ char *opwords[] = { "create", "status", "start", "stop",
+ "config", "force", "delete",
+ "push-pem", "detail", NULL };
char *w = NULL;
GF_ASSERT (words);
@@ -1335,9 +1762,11 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
/* new syntax:
*
- * volume geo-replication [$m [$s]] status
+ * volume geo-replication $m $s create [push-pem] [force]
+ * volume geo-replication [$m [$s]] status [detail]
* volume geo-replication [$m] $s config [[!]$opt [$val]]
- * volume geo-replication $m $s start|stop
+ * volume geo-replication $m $s start|stop [force]
+ * volume geo-replication $m $s delete
*/
if (wordcount < 3)
@@ -1368,6 +1797,13 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
if (slavei == 3)
masteri = 2;
} else if (i <= 3) {
+ if (!strcmp ((char *)words[wordcount-1], "detail")) {
+ /* For status detail it is mandatory to provide
+ * both master and slave */
+ ret = -1;
+ goto out;
+ }
+
/* no $s, can only be status cmd
* (with either a single $m before it or nothing)
* -- these conditions imply that i <= 3 after
@@ -1394,7 +1830,12 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
if (!w)
goto out;
- if (strcmp (w, "status") == 0) {
+ if (strcmp (w, "create") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_CREATE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp (w, "status") == 0) {
type = GF_GSYNC_OPTION_TYPE_STATUS;
if (slavei && !masteri)
@@ -1414,9 +1855,33 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
if (!masteri || !slavei)
goto out;
+ } else if (strcmp (w, "delete") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_DELETE;
+
+ if (!masteri || !slavei)
+ goto out;
} else
GF_ASSERT (!"opword mismatch");
+ ret = force_push_pem_parse (words, wordcount, dict, &cmdi);
+ if (ret)
+ goto out;
+
+ if (!strcmp ((char *)words[wordcount-1], "detail")) {
+ if (strcmp ((char *)words[wordcount-2], "status")) {
+ ret = -1;
+ goto out;
+ }
+ if (!slavei || !masteri) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32 (dict, "status-detail", _gf_true);
+ if (ret)
+ goto out;
+ cmdi++;
+ }
+
if (type != GF_GSYNC_OPTION_TYPE_CONFIG &&
(cmdi < wordcount - 1 || glob))
goto out;
@@ -1425,72 +1890,27 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
ret = 0;
- if (masteri)
+ if (masteri) {
ret = dict_set_str (dict, "master", (char *)words[masteri]);
+ if (!ret)
+ ret = dict_set_str (dict, "volname",
+ (char *)words[masteri]);
+ }
if (!ret && slavei)
ret = dict_set_str (dict, "slave", (char *)words[slavei]);
if (!ret)
ret = dict_set_int32 (dict, "type", type);
- if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
- switch ((wordcount - 1) - cmdi) {
- case 0:
- subop = gf_strdup ("get-all");
- break;
- case 1:
- if (words[cmdi + 1][0] == '!') {
- (words[cmdi + 1])++;
- if (gf_asprintf (&subop, "del%s", glob ? "-glob" : "") == -1)
- subop = NULL;
- } else
- subop = gf_strdup ("get");
-
- ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
- if (ret < 0)
- goto out;
- break;
- default:
- if (gf_asprintf (&subop, "set%s", glob ? "-glob" : "") == -1)
- subop = NULL;
-
- ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
- if (ret < 0)
- goto out;
-
- /* join the varargs by spaces to get the op_value */
-
- for (i = cmdi + 2; i < wordcount; i++)
- append_len += (strlen (words[i]) + 1);
- /* trailing strcat will add two bytes, make space for that */
- append_len++;
-
- append_str = GF_CALLOC (1, append_len, cli_mt_append_str);
- if (!append_str) {
- ret = -1;
- goto out;
- }
-
- for (i = cmdi + 2; i < wordcount; i++) {
- strcat (append_str, words[i]);
- strcat (append_str, " ");
- }
- append_str[append_len - 2] = '\0';
-
- ret = dict_set_dynstr (dict, "op_value", append_str);
- }
-
- if (!subop || dict_set_dynstr (dict, "subop", subop) != 0)
- ret = -1;
- }
+ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG)
+ ret = config_parse (words, wordcount, dict, cmdi, glob);
out:
if (ret) {
if (dict)
dict_destroy (dict);
- if (append_str)
- GF_FREE (append_str);
} else
*options = dict;
+
return ret;
}
@@ -1512,7 +1932,7 @@ cli_cmd_volume_profile_parse (const char **words, int wordcount,
if (!dict)
goto out;
- if (wordcount != 4)
+ if (wordcount < 4 || wordcount >5)
goto out;
volname = (char *)words[2];
@@ -1534,7 +1954,19 @@ cli_cmd_volume_profile_parse (const char **words, int wordcount,
op = GF_CLI_STATS_INFO;
} else
GF_ASSERT (!"opword mismatch");
+
ret = dict_set_int32 (dict, "op", (int32_t)op);
+ if (ret)
+ goto out;
+
+ if (wordcount == 5) {
+ if (!strcmp (words[4], "nfs")) {
+ ret = dict_set_int32 (dict, "nfs", _gf_true);
+ if (ret)
+ goto out;
+ }
+ }
+
*options = dict;
out:
if (ret && dict)
@@ -1556,12 +1988,13 @@ cli_cmd_volume_top_parse (const char **words, int wordcount,
int32_t list_cnt = -1;
int index = 0;
int perf = 0;
- int32_t blk_size = 0;
- int32_t count = 0;
+ uint32_t blk_size = 0;
+ uint32_t count = 0;
+ gf_boolean_t nfs = _gf_false;
char *delimiter = NULL;
char *opwords[] = { "open", "read", "write", "opendir",
"readdir", "read-perf", "write-perf",
- NULL };
+ "clear", NULL };
char *w = NULL;
GF_ASSERT (words);
@@ -1606,13 +2039,30 @@ cli_cmd_volume_top_parse (const char **words, int wordcount,
} else if (strcmp (w, "write-perf") == 0) {
top_op = GF_CLI_TOP_WRITE_PERF;
perf = 1;
+ } else if (strcmp (w, "clear") == 0) {
+ ret = dict_set_int32 (dict, "clear-stats", 1);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not set clear-stats in dict");
+ goto out;
+ }
} else
GF_ASSERT (!"opword mismatch");
ret = dict_set_int32 (dict, "top-op", (int32_t)top_op);
if (ret)
goto out;
- for (index = 4; index < wordcount; index+=2) {
+ if ((wordcount > 4) && !strcmp (words[4], "nfs")) {
+ nfs = _gf_true;
+ ret = dict_set_int32 (dict, "nfs", nfs);
+ if (ret)
+ goto out;
+ index = 5;
+ } else {
+ index = 4;
+ }
+
+ for (; index < wordcount; index+=2) {
key = (char *) words[index];
value = (char *) words[index+1];
@@ -1623,14 +2073,14 @@ cli_cmd_volume_top_parse (const char **words, int wordcount,
}
if (!strcmp (key, "brick")) {
delimiter = strchr (value, ':');
- if (!delimiter || delimiter == value
+ if (!delimiter || delimiter == value
|| *(delimiter+1) != '/') {
- cli_out ("wrong brick type: %s, use <HOSTNAME>:"
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
"<export-dir-abs-path>", value);
ret = -1;
goto out;
} else {
- ret = cli_canonicalize_path (delimiter + 1);
+ ret = gf_canonicalize_path (delimiter + 1);
if (ret)
goto out;
}
@@ -1641,39 +2091,40 @@ cli_cmd_volume_top_parse (const char **words, int wordcount,
if (!ret)
list_cnt = atoi (value);
if (ret || (list_cnt < 0) || (list_cnt > 100)) {
- cli_out ("list-cnt should be between 0 to 100");
+ cli_err ("list-cnt should be between 0 to 100");
ret = -1;
goto out;
}
- } else if (perf && !strcmp (key, "bs")) {
+ } else if (perf && !nfs && !strcmp (key, "bs")) {
ret = gf_is_str_int (value);
if (!ret)
blk_size = atoi (value);
if (ret || (blk_size <= 0)) {
if (blk_size < 0)
- cli_out ("block size is an invalid number");
+ cli_err ("block size is an invalid"
+ " number");
else
- cli_out ("block size should be an integer "
- "greater than zero");
+ cli_err ("block size should be an "
+ "integer greater than zero");
ret = -1;
goto out;
}
- ret = dict_set_int32 (dict, "blk-size", blk_size);
- } else if (perf && !strcmp (key, "count")) {
+ ret = dict_set_uint32 (dict, "blk-size", blk_size);
+ } else if (perf && !nfs && !strcmp (key, "count")) {
ret = gf_is_str_int (value);
if (!ret)
count = atoi(value);
if (ret || (count <= 0)) {
if (count < 0)
- cli_out ("count is an invalid number");
- else
- cli_out ("count should be an integer "
+ cli_err ("count is an invalid number");
+ else
+ cli_err ("count should be an integer "
"greater than zero");
ret = -1;
goto out;
}
- ret = dict_set_int32 (dict, "blk-cnt", count);
+ ret = dict_set_uint32 (dict, "blk-cnt", count);
} else {
ret = -1;
goto out;
@@ -1693,9 +2144,17 @@ cli_cmd_volume_top_parse (const char **words, int wordcount,
}
if ((blk_size > 0) ^ (count > 0)) {
+ cli_err ("Need to give both 'bs' and 'count'");
+ ret = -1;
+ goto out;
+ } else if (((uint64_t)blk_size * count) > (10 * GF_UNIT_GB)) {
+ cli_err ("'bs * count' value %"PRIu64" is greater than "
+ "maximum allowed value of 10GB",
+ ((uint64_t)blk_size * count));
ret = -1;
goto out;
}
+
*options = dict;
out:
if (ret && dict)
@@ -1703,23 +2162,178 @@ out:
return ret;
}
-int32_t
+uint32_t
+cli_cmd_get_statusop (const char *arg)
+{
+ int i = 0;
+ uint32_t ret = GF_CLI_STATUS_NONE;
+ char *w = NULL;
+ char *opwords[] = {"detail", "mem", "clients", "fd",
+ "inode", "callpool", "tasks", NULL};
+ struct {
+ char *opname;
+ uint32_t opcode;
+ } optable[] = {
+ { "detail", GF_CLI_STATUS_DETAIL },
+ { "mem", GF_CLI_STATUS_MEM },
+ { "clients", GF_CLI_STATUS_CLIENTS },
+ { "fd", GF_CLI_STATUS_FD },
+ { "inode", GF_CLI_STATUS_INODE },
+ { "callpool", GF_CLI_STATUS_CALLPOOL },
+ { "tasks", GF_CLI_STATUS_TASKS },
+ { NULL }
+ };
+
+ w = str_getunamb (arg, opwords);
+ if (!w) {
+ gf_log ("cli", GF_LOG_DEBUG,
+ "Not a status op %s", arg);
+ goto out;
+ }
+
+ for (i = 0; optable[i].opname; i++) {
+ if (!strcmp (w, optable[i].opname)) {
+ ret = optable[i].opcode;
+ break;
+ }
+ }
+
+ out:
+ return ret;
+}
+
+int
cli_cmd_volume_status_parse (const char **words, int wordcount,
- dict_t **options)
+ dict_t **options)
{
- dict_t *dict = NULL;
- int ret = -1;
+ dict_t *dict = NULL;
+ int ret = -1;
+ uint32_t cmd = 0;
- GF_ASSERT (words);
GF_ASSERT (options);
dict = dict_new ();
if (!dict)
goto out;
- GF_ASSERT(words[2]);
+ switch (wordcount) {
+
+ case 2:
+ cmd = GF_CLI_STATUS_ALL;
+ ret = 0;
+ break;
+
+ case 3:
+ if (!strcmp (words[2], "all")) {
+ cmd = GF_CLI_STATUS_ALL;
+ ret = 0;
+
+ } else {
+ cmd = GF_CLI_STATUS_VOL;
+ ret = dict_set_str (dict, "volname", (char *)words[2]);
+ }
+
+ break;
+
+ case 4:
+ cmd = cli_cmd_get_statusop (words[3]);
+
+ if (!strcmp (words[2], "all")) {
+ if (cmd == GF_CLI_STATUS_NONE) {
+ cli_err ("%s is not a valid status option",
+ words[3]);
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_ALL;
+ ret = 0;
+
+ } else {
+ ret = dict_set_str (dict, "volname",
+ (char *)words[2]);
+ if (ret)
+ goto out;
+
+ if (cmd == GF_CLI_STATUS_NONE) {
+ if (!strcmp (words[3], "nfs")) {
+ cmd |= GF_CLI_STATUS_NFS;
+ } else if (!strcmp (words[3], "shd")) {
+ cmd |= GF_CLI_STATUS_SHD;
+ } else {
+ cmd = GF_CLI_STATUS_BRICK;
+ ret = dict_set_str (dict, "brick",
+ (char *)words[3]);
+ }
+
+ } else {
+ cmd |= GF_CLI_STATUS_VOL;
+ ret = 0;
+ }
+ }
+
+ break;
+
+ case 5:
+ if (!strcmp (words[2], "all")) {
+ cli_err ("Cannot specify brick/nfs for \"all\"");
+ ret = -1;
+ goto out;
+ }
+
+ cmd = cli_cmd_get_statusop (words[4]);
+ if (cmd == GF_CLI_STATUS_NONE) {
+ cli_err ("%s is not a valid status option",
+ words[4]);
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = dict_set_str (dict, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ if (!strcmp (words[3], "nfs")) {
+ if (cmd == GF_CLI_STATUS_FD ||
+ cmd == GF_CLI_STATUS_DETAIL ||
+ cmd == GF_CLI_STATUS_TASKS) {
+ cli_err ("Detail/FD/Tasks status not available"
+ " for NFS Servers");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_NFS;
+ } else if (!strcmp (words[3], "shd")){
+ if (cmd == GF_CLI_STATUS_FD ||
+ cmd == GF_CLI_STATUS_CLIENTS ||
+ cmd == GF_CLI_STATUS_DETAIL ||
+ cmd == GF_CLI_STATUS_TASKS) {
+ cli_err ("Detail/FD/Clients/Tasks status not "
+ "available for Self-heal Daemons");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_SHD;
+ } else {
+ if (cmd == GF_CLI_STATUS_TASKS) {
+ cli_err ("Tasks status not available for "
+ "bricks");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_BRICK;
+ ret = dict_set_str (dict, "brick", (char *)words[3]);
+ }
+ break;
+
+ default:
+ goto out;
+ }
- ret = dict_set_str (dict, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "cmd", cmd);
if (ret)
goto out;
@@ -1733,18 +2347,20 @@ cli_cmd_volume_status_parse (const char **words, int wordcount,
}
gf_boolean_t
-cli_cmd_validate_dumpoption (const char *option)
+cli_cmd_validate_dumpoption (const char *arg, char **option)
{
- char *opwords[] = {"all", "mem", "iobuf", "callpool", "priv", "fd",
- "inode", NULL};
+ char *opwords[] = {"all", "nfs", "mem", "iobuf", "callpool", "priv",
+ "fd", "inode", "history", "inodectx", "fdctx",
+ NULL};
char *w = NULL;
- w = str_getunamb (option, opwords);
+ w = str_getunamb (arg, opwords);
if (!w) {
gf_log ("cli", GF_LOG_DEBUG, "Unknown statedump option %s",
- option);
+ arg);
return _gf_false;
}
+ *option = w;
return _gf_true;
}
@@ -1756,25 +2372,27 @@ cli_cmd_volume_statedump_options_parse (const char **words, int wordcount,
int i = 0;
dict_t *dict = NULL;
int option_cnt = 0;
+ char *option = NULL;
char option_str[100] = {0,};
for (i = 3; i < wordcount; i++, option_cnt++) {
- if (!cli_cmd_validate_dumpoption (words[i])) {
+ if (!cli_cmd_validate_dumpoption (words[i], &option)) {
ret = -1;
goto out;
}
- strncat (option_str, words[i], sizeof (words [i]));
+ strncat (option_str, option, strlen (option));
strncat (option_str, " ", 1);
}
+
dict = dict_new ();
if (!dict)
goto out;
- ret = dict_set_str (dict, "options", gf_strdup (option_str));
+ ret = dict_set_dynstr (dict, "options", gf_strdup (option_str));
if (ret)
goto out;
- ret = dict_set_int32 (dict, "option-cnt", option_cnt);
+ ret = dict_set_int32 (dict, "option_cnt", option_cnt);
if (ret)
goto out;
@@ -1786,3 +2404,1279 @@ out:
gf_log ("cli", GF_LOG_ERROR, "Error parsing dumpoptions");
return ret;
}
+
+int
+cli_cmd_volume_clrlks_opts_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = -1;
+ int i = 0;
+ dict_t *dict = NULL;
+ char *kind_opts[4] = {"blocked", "granted", "all", NULL};
+ char *types[4] = {"inode", "entry", "posix", NULL};
+ char *free_ptr = NULL;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (strcmp (words[4], "kind"))
+ goto out;
+
+ for (i = 0; kind_opts[i]; i++) {
+ if (!strcmp (words[5], kind_opts[i])) {
+ free_ptr = gf_strdup (words[5]);
+ ret = dict_set_dynstr (dict, "kind", free_ptr);
+ if (ret)
+ goto out;
+ free_ptr = NULL;
+ break;
+ }
+ }
+ if (i == 3)
+ goto out;
+
+ ret = -1;
+ for (i = 0; types[i]; i++) {
+ if (!strcmp (words[6], types[i])) {
+ free_ptr = gf_strdup (words[6]);
+ ret = dict_set_dynstr (dict, "type", free_ptr);
+ if (ret)
+ goto out;
+ free_ptr = NULL;
+ break;
+ }
+ }
+ if (i == 3)
+ goto out;
+
+ if (wordcount == 8) {
+ free_ptr = gf_strdup (words[7]);
+ ret = dict_set_dynstr (dict, "opts", free_ptr);
+ if (ret)
+ goto out;
+ free_ptr = NULL;
+ }
+
+ ret = 0;
+ *options = dict;
+out:
+ if (ret) {
+ GF_FREE (free_ptr);
+ dict_unref (dict);
+ }
+
+ return ret;
+}
+
+static int
+extract_hostname_path_from_token (const char *tmp_words, char **hostname,
+ char **path)
+{
+ int ret = 0;
+ char *delimiter = NULL;
+ char *tmp_host = NULL;
+ char *host_name = NULL;
+ char *words = NULL;
+
+ *hostname = NULL;
+ *path = NULL;
+
+ words = GF_CALLOC (1, strlen (tmp_words) + 1, gf_common_mt_char);
+ if (!words){
+ ret = -1;
+ goto out;
+ }
+
+ strncpy (words, tmp_words, strlen (tmp_words) + 1);
+
+ if (validate_brick_name (words)) {
+ cli_err ("Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>", words);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr (words, ':');
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret) {
+ goto out;
+ } else {
+ *path = GF_CALLOC (1, strlen (delimiter+1) +1,
+ gf_common_mt_char);
+ if (!*path) {
+ ret = -1;
+ goto out;
+
+ }
+ strncpy (*path, delimiter +1,
+ strlen(delimiter + 1) + 1);
+ }
+ }
+
+ tmp_host = gf_strdup (words);
+ if (!tmp_host) {
+ gf_log ("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name (tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli",GF_LOG_ERROR, "Unable to allocate "
+ "memory");
+ goto out;
+ }
+ if (!(strcmp (host_name, "localhost") &&
+ strcmp (host_name, "127.0.0.1") &&
+ strncmp (host_name, "0.", 2))) {
+ cli_err ("Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ goto out;
+ }
+ if (!valid_internet_address (host_name, _gf_false)) {
+ cli_err ("internet address '%s' does not conform to "
+ "standards", host_name);
+ ret = -1;
+ goto out;
+ }
+
+ *hostname = GF_CALLOC (1, strlen (host_name) + 1,
+ gf_common_mt_char);
+ if (!*hostname) {
+ ret = -1;
+ goto out;
+ }
+ strncpy (*hostname, host_name, strlen (host_name) + 1);
+ ret = 0;
+
+out:
+ GF_FREE (words);
+ GF_FREE (tmp_host);
+ return ret;
+}
+
+
+int
+cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, "volname", (char *) words[2]);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set volname");
+ goto out;
+ }
+
+ if (wordcount == 3) {
+ ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_HEAL_INDEX);
+ goto done;
+ }
+
+ if (wordcount == 4) {
+ if (!strcmp (words[3], "full")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEAL_FULL);
+ goto done;
+ } else if (!strcmp (words[3], "statistics")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_STATISTICS);
+ goto done;
+
+ } else if (!strcmp (words[3], "info")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_INDEX_SUMMARY);
+ goto done;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+ if (wordcount == 5) {
+ if (strcmp (words[3], "info") &&
+ strcmp (words[3], "statistics")) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!strcmp (words[3], "info")) {
+ if (!strcmp (words[4], "healed")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEALED_FILES);
+ goto done;
+ }
+ if (!strcmp (words[4], "heal-failed")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEAL_FAILED_FILES);
+ goto done;
+ }
+ if (!strcmp (words[4], "split-brain")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_SPLIT_BRAIN_FILES);
+ goto done;
+ }
+ }
+
+ if (!strcmp (words[3], "statistics")) {
+ if (!strcmp (words[4], "heal-count")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_STATISTICS_HEAL_COUNT);
+ goto done;
+ }
+ }
+ ret = -1;
+ goto out;
+ }
+ if (wordcount == 7) {
+ if (!strcmp (words[3], "statistics")
+ && !strcmp (words[4], "heal-count")
+ && !strcmp (words[5], "replica")) {
+
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA);
+ if (ret)
+ goto out;
+ ret = extract_hostname_path_from_token (words[6],
+ &hostname, &path);
+ if (ret)
+ goto out;
+ ret = dict_set_dynstr (dict, "per-replica-cmd-hostname",
+ hostname);
+ if (ret)
+ goto out;
+ ret = dict_set_dynstr (dict, "per-replica-cmd-path",
+ path);
+ if (ret)
+ goto out;
+ else
+ goto done;
+
+ }
+ }
+ ret = -1;
+ goto out;
+done:
+ *options = dict;
+out:
+ if (ret && dict) {
+ dict_unref (dict);
+ *options = NULL;
+ }
+
+ return ret;
+}
+
+int
+cli_cmd_volume_defrag_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *option = NULL;
+ char *volname = NULL;
+ char *command = NULL;
+ gf_cli_defrag_type cmd = 0;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (!((wordcount == 4) || (wordcount == 5)))
+ goto out;
+
+ if (wordcount == 4) {
+ if (strcmp (words[3], "start") && strcmp (words[3], "stop") &&
+ strcmp (words[3], "status"))
+ goto out;
+ } else {
+ if (strcmp (words[3], "fix-layout") &&
+ strcmp (words[3], "start"))
+ goto out;
+ }
+
+ volname = (char *) words[2];
+
+ if (wordcount == 4) {
+ command = (char *) words[3];
+ }
+ if (wordcount == 5) {
+ if ((strcmp (words[3], "fix-layout") ||
+ strcmp (words[4], "start")) &&
+ (strcmp (words[3], "start") ||
+ strcmp (words[4], "force"))) {
+ ret = -1;
+ goto out;
+ }
+ command = (char *) words[3];
+ option = (char *) words[4];
+ }
+
+ if (strcmp (command, "start") == 0) {
+ cmd = GF_DEFRAG_CMD_START;
+ if (option && strcmp (option, "force") == 0) {
+ cmd = GF_DEFRAG_CMD_START_FORCE;
+ }
+ goto done;
+ }
+
+ if (strcmp (command, "fix-layout") == 0) {
+ cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
+ goto done;
+ }
+ if (strcmp (command, "stop") == 0) {
+ cmd = GF_DEFRAG_CMD_STOP;
+ goto done;
+ }
+ if (strcmp (command, "status") == 0) {
+ cmd = GF_DEFRAG_CMD_STATUS;
+ }
+
+done:
+ ret = dict_set_str (dict, "volname", volname);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "rebalance-command", (int32_t) cmd);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+int32_t
+cli_snap_create_desc_parse (dict_t *dict, const char **words,
+ size_t wordcount, int32_t desc_opt_loc)
+{
+ int32_t ret = -1;
+ char *desc = NULL;
+ int32_t desc_len = 0;
+
+ desc = GF_CALLOC (MAX_SNAP_DESCRIPTION_LEN + 1, sizeof(char),
+ gf_common_mt_char);
+ if (!desc) {
+ ret = -1;
+ goto out;
+ }
+
+
+ if (strlen (words[desc_opt_loc]) >= MAX_SNAP_DESCRIPTION_LEN) {
+ cli_out ("snapshot create: description truncated: "
+ "Description provided is longer than 1024 characters");
+ desc_len = MAX_SNAP_DESCRIPTION_LEN;
+ } else {
+ desc_len = strlen (words[desc_opt_loc]);
+ }
+
+ strncpy (desc, words[desc_opt_loc], desc_len);
+ desc[desc_len] = '\0';
+ /* Calculating the size of the description as given by the user */
+
+ ret = dict_set_dynstr (dict, "description", desc);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save snap "
+ "description");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && desc)
+ GF_FREE (desc);
+
+ return ret;
+}
+
+/* Function to check whether the Volume name is repeated */
+int
+cli_check_if_volname_repeated (const char **words, unsigned int start_index,
+ uint64_t cur_index) {
+ uint64_t i = -1;
+ int ret = 0;
+
+ GF_ASSERT (words);
+
+ for (i = start_index ; i < cur_index ; i++) {
+ if (strcmp (words[i], words[cur_index]) == 0) {
+ ret = -1;
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+/* snapshot create <snapname> <vol-name(s)> [description <description>]
+ * [force]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_create_parse (dict_t *dict, const char **words, int wordcount) {
+ uint64_t i = 0;
+ int ret = -1;
+ uint64_t volcount = 0;
+ char key[PATH_MAX] = "";
+ char *snapname = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot create)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount <= cmdi + 1) {
+ cli_err ("Invalid Syntax.");
+ gf_log ("cli", GF_LOG_ERROR,
+ "Too less words for snap create command");
+ goto out;
+ }
+
+ if (strlen(words[cmdi]) >= GLUSTERD_MAX_SNAP_NAME) {
+ cli_err ("snapshot create: failed: snapname cannot exceed "
+ "255 characters.");
+ gf_log ("cli", GF_LOG_ERROR, "Snapname too long");
+
+ goto out;
+ }
+
+ snapname = (char *) words[cmdi];
+ for (i = 0 ; i < strlen (snapname); i++) {
+ /* Following volume name convention */
+ if (!isalnum (snapname[i]) && (snapname[i] != '_'
+ && (snapname[i] != '-'))) {
+ /* TODO : Is this message enough?? */
+ cli_err ("Snapname can contain only alphanumeric, "
+ "\"-\" and \"_\" characters");
+ goto out;
+ }
+ }
+
+ ret = dict_set_str (dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save snap "
+ "name");
+ goto out;
+ }
+
+ /* Filling volume name in the dictionary */
+ for (i = cmdi + 1 ; i < wordcount
+ && (strcmp (words[i], "description")) != 0
+ && (strcmp (words[i], "force") != 0); i++) {
+ volcount++;
+ /* volume index starts from 1 */
+ ret = snprintf (key, sizeof (key),"volname%ld", volcount);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_str (dict, key, (char *)words[i]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not "
+ "save volume name");
+ goto out;
+ }
+
+ if (i >= cmdi + 2) {
+ ret = -1;
+ cli_err("Creating multiple volume snapshot is not "
+ "supported as of now");
+ goto out;
+ }
+ /* TODO : remove this above condition check once
+ * multiple volume snapshot is supported */
+ }
+
+ if (volcount == 0) {
+ ret = -1;
+ cli_err ("Please provide the volume name");
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "volcount", volcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save volcount");
+ goto out;
+ }
+
+ /* Verify how we got out of "for" loop,
+ * if it is by reaching wordcount limit then goto "out",
+ * because we need not parse for "description" and "force"
+ * after this.
+ */
+ if (i == wordcount) {
+ goto out;
+ }
+
+ if ((strcmp (words[i], "description")) == 0) {
+ ++i;
+ if (i > (wordcount - 1)) {
+ ret = -1;
+ cli_err ("Please provide a description");
+ gf_log ("cli", GF_LOG_ERROR,
+ "Description not provided");
+ goto out;
+ }
+
+ ret = cli_snap_create_desc_parse(dict, words, wordcount, i);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save snap "
+ "description");
+ goto out;
+ }
+
+ if ( i == (wordcount - 1))
+ goto out;
+ i++;
+ /* point the index to next word.
+ * As description might be follwed by force option.
+ * Before that, check if wordcount limit is reached
+ */
+ }
+
+ if ((strcmp (words[i], "force") != 0)) {
+ ret = -1;
+ cli_err ("Invalid Syntax.");
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ ret = dict_set_int8 (dict, "snap-force", 1);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save "
+ "snap force option");
+ goto out;
+ }
+
+ /* Check if the command has anything after "force" keyword */
+ if (++i < wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+
+out :
+ return ret;
+}
+
+/* snapshot list [volname]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_list_parse (dict_t *dict, const char **words, int wordcount) {
+ int ret = -1;
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount < 2 || wordcount > 3) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == 2) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to save volname in dictionary");
+ goto out;
+ }
+out :
+ return ret;
+}
+
+/* snapshot info [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_info_parse (dict_t *dict, const char **words, int wordcount)
+{
+
+ int ret = -1;
+ int32_t cmd = GF_SNAP_INFO_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot info)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log ("", GF_LOG_ERROR, "Invalid syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* If 3rd word is not "volume", then it must
+ * be snapname.
+ */
+ if (strcmp (words[cmdi], "volume") != 0) {
+ ret = dict_set_str (dict, "snapname",
+ (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save "
+ "snapname %s", words[cmdi]);
+ goto out;
+ }
+
+ /* Once snap name is parsed, if we encounter any other
+ * word then fail it. Invalid Syntax.
+ * example : snapshot info <snapname> word
+ */
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ cmd = GF_SNAP_INFO_TYPE_SNAP;
+ ret = 0;
+ goto out;
+ /* No need to continue the parsing once we
+ * get the snapname
+ */
+ }
+
+ /* If 3rd word is "volume", then check if next word
+ * is present. As, "snapshot info volume" is an
+ * invalid command.
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", (char *)words[wordcount - 1]);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Count not save "
+ "volume name %s", words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_INFO_TYPE_VOL;
+out :
+ if (ret == 0) {
+ ret = dict_set_int32 (dict, "cmd", cmd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save "
+ "type of snapshot info");
+ }
+ }
+ return ret;
+}
+
+
+
+/* snapshot restore <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_restore_parse (dict_t *dict, const char **words, int wordcount)
+{
+
+ int ret = -1;
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount != 3) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save snap-name %s",
+ words[2]);
+ goto out;
+ }
+out :
+ return ret;
+}
+
+/* snapshot delete <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ * 1 if user cancel the operation
+ */
+int
+cli_snap_delete_parse (dict_t *dict, const char **words, int wordcount,
+ struct cli_state *state) {
+
+ int ret = -1;
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ question = "Deleting snap will erase all the information about "
+ "the snap. Do you still want to continue?";
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount != 3) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save snapname %s",
+ words[2]);
+ goto out;
+ }
+
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log ("cli", GF_LOG_DEBUG, "User cancelled "
+ "snapshot delete operation");
+ goto out;
+ }
+out :
+ return ret;
+}
+
+/* snapshot status [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_status_parse (dict_t *dict, const char **words, int wordcount)
+{
+
+ int ret = -1;
+ int32_t cmd = GF_SNAP_STATUS_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot status)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* if 3rd word is not "volume", then it must be "snapname"
+ */
+ if (strcmp (words[cmdi], "volume") != 0) {
+ ret = dict_set_str (dict, "snapname",
+ (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Count not save "
+ "snap name %s", words[cmdi]);
+ goto out;
+ }
+
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+ cmd = GF_SNAP_STATUS_TYPE_SNAP;
+ goto out;
+ }
+
+ /* If 3rd word is "volume", then check if next word is present.
+ * As, "snapshot info volume" is an invalid command
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", (char *)words [wordcount - 1]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Count not save "
+ "volume name %s", words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_STATUS_TYPE_VOL;
+
+out :
+ if (ret == 0) {
+ ret = dict_set_int32 (dict, "cmd", cmd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save cmd "
+ "of snapshot status");
+ }
+ }
+ return ret;
+}
+
+
+int32_t
+cli_snap_config_limit_parse (const char **words, dict_t *dict,
+ unsigned int wordcount, unsigned int index,
+ char *key)
+{
+ int ret = -1;
+ int limit = 0;
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+ GF_ASSERT (key);
+
+ if (index >= wordcount) {
+ ret = -1;
+ cli_err ("Please provide a value for %s.",key);
+ gf_log ("cli", GF_LOG_ERROR, "Value not provided for %s", key);
+ goto out;
+ }
+
+ limit = strtol (words[index], NULL, 0);
+ if (limit <= 0) {
+ ret = -1;
+ cli_err ("%s should be greater than 0.", key);
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, key, limit);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not set "
+ "%s in dictionary", key);
+ goto out;
+ }
+
+out :
+ return ret;
+}
+
+/* function cli_snap_config_parse
+ * Config Syntax : gluster snapshot config [volname]
+ * [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <count>]
+ *
+ return value: <0 on failure
+ 1 if user cancels the operation
+ 0 on success
+
+ NOTE : snap-max-soft-limit can only be set for system.
+*/
+int32_t
+cli_snap_config_parse (const char **words, int wordcount, dict_t *dict,
+ struct cli_state *state)
+{
+ int ret = -1;
+ gf_answer_t answer = GF_ANSWER_NO;
+ gf_boolean_t vol_presence = _gf_false;
+ struct snap_config_opt_vals_ *conf_vals = NULL;
+ int8_t hard_limit = 0;
+ int8_t soft_limit = 0;
+ int8_t config_type = -1;
+ const char *question = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot config)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+ GF_ASSERT (state);
+
+ if ((wordcount < 2) || (wordcount > 7)) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Invalid wordcount(%d)", wordcount);
+ goto out;
+ }
+
+ if (wordcount == 2) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+
+ /* Check whether the 3rd word is volname */
+ if (strcmp (words[cmdi], "snap-max-hard-limit") != 0
+ && strcmp (words[cmdi], "snap-max-soft-limit") != 0) {
+ ret = dict_set_str (dict, "volname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set volname");
+ goto out;
+ }
+ cmdi++;
+ vol_presence = _gf_true;
+
+ if (cmdi == wordcount) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+ }
+
+ config_type = GF_SNAP_CONFIG_TYPE_SET;
+
+ if (strcmp (words[cmdi], "snap-max-hard-limit") == 0) {
+ ret = cli_snap_config_limit_parse (words, dict, wordcount,
+ ++cmdi, "snap-max-hard-limit");
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse snap "
+ "config hard limit");
+ goto out;
+ }
+ hard_limit = 1;
+
+ if (++cmdi == wordcount) {
+ ret = 0;
+ goto set;
+ }
+ }
+
+ if (strcmp (words[cmdi], "snap-max-soft-limit") == 0) {
+ if (vol_presence == 1) {
+ ret = -1;
+ cli_err ("Soft limit cannot be set to individual "
+ "volumes.");
+ gf_log ("cli", GF_LOG_ERROR, "Soft limit cannot be "
+ "set to volumes");
+ goto out;
+ }
+
+ ret = cli_snap_config_limit_parse (words, dict, wordcount,
+ ++cmdi, "snap-max-soft-limit");
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse snap "
+ "config soft limit");
+ goto out;
+ }
+
+ if (++cmdi != wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ soft_limit = 1;
+ } else {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ ret = 0; /* Success */
+
+set:
+ ret = dict_set_int32 (dict, "config-command", config_type);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to set "
+ "config-command");
+ goto out;
+ }
+
+ if (config_type == GF_SNAP_CONFIG_TYPE_SET) {
+ conf_vals = snap_confopt_vals;
+ if (hard_limit && soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_BOTH].question;
+ } else if (soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_SOFT].question;
+ } else if (hard_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_HARD].question;
+ }
+
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log ("cli", GF_LOG_DEBUG, "User cancelled "
+ "snapshot config operation");
+ }
+ }
+
+out:
+ return ret;
+}
+
+int
+validate_snapname (const char *snapname, char **opwords) {
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (snapname);
+ GF_ASSERT (opwords);
+
+ for (i = 0 ; opwords[i] != NULL; i++) {
+ if (strcmp (opwords[i], snapname) == 0) {
+ cli_out ("\"%s\" cannot be a snapname", snapname);
+ goto out;
+ }
+ }
+ ret = 0;
+out :
+ return ret;
+}
+
+int32_t
+cli_cmd_snapshot_parse (const char **words, int wordcount, dict_t **options,
+ struct cli_state *state)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_snapshot type = GF_SNAP_OPTION_TYPE_NONE;
+ char *w = NULL;
+ char *opwords[] = {"create", "delete", "restore", "start",
+ "stop", "list", "status", "config",
+ "info", NULL};
+ char *invalid_snapnames[] = {"description", "force",
+ "volume", NULL};
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+ GF_ASSERT (state);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ /* Lowest wordcount possible */
+ if (wordcount < 2) {
+ gf_log ("", GF_LOG_ERROR,
+ "Invalid command: Not enough arguments");
+ goto out;
+ }
+
+ w = str_getunamb (words[1], opwords);
+ if (!w) {
+ /* Checks if the operation is a valid operation */
+ gf_log ("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ }
+
+ if (!strcmp (w, "create")) {
+ type = GF_SNAP_OPTION_TYPE_CREATE;
+ } else if (!strcmp (w, "list")) {
+ type = GF_SNAP_OPTION_TYPE_LIST;
+ } else if (!strcmp (w, "info")) {
+ type = GF_SNAP_OPTION_TYPE_INFO;
+ } else if (!strcmp (w, "delete")) {
+ type = GF_SNAP_OPTION_TYPE_DELETE;
+ } else if (!strcmp (w, "config")) {
+ type = GF_SNAP_OPTION_TYPE_CONFIG;
+ } else if (!strcmp (w, "restore")) {
+ type = GF_SNAP_OPTION_TYPE_RESTORE;
+ } else if (!strcmp (w, "status")) {
+ type = GF_SNAP_OPTION_TYPE_STATUS;
+ }
+
+ if (type != GF_SNAP_OPTION_TYPE_CONFIG) {
+ ret = dict_set_int32 (dict, "hold_snap_locks", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to set hold-snap-locks value "
+ "as _gf_true");
+ goto out;
+ }
+ }
+
+ /* Check which op is intended */
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ {
+ /* Syntax :
+ * gluster snapshot create <snapname> <vol-name(s)>
+ * [description <description>]
+ * [force]
+ */
+
+ /* In cases where the snapname is not given then
+ * parsing fails & snapname cannot be "description",
+ * "force" and "volume", that check is made here
+ */
+ if (wordcount == 2){
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR,
+ "Invalid Syntax");
+ goto out;
+ }
+
+ ret = validate_snapname (words[2], invalid_snapnames);
+ if (ret) {
+ goto out;
+ }
+
+ ret = cli_snap_create_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "create command parsing failed.");
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_OPTION_TYPE_INFO:
+ {
+ /* Syntax :
+ * gluster snapshot info [(snapname] | [vol <volname>)]
+ */
+ ret = cli_snap_info_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot info command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ {
+ /* Syntax :
+ * gluster snaphsot list [volname]
+ */
+
+ ret = cli_snap_list_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot list command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ {
+ /* Syntax :
+ * gluster snapshot delete <snapname>
+ */
+ ret = cli_snap_delete_parse (dict, words, wordcount,
+ state);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot delete command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ {
+ /* snapshot config [volname] [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <percent>] */
+ ret = cli_snap_config_parse (words, wordcount, dict,
+ state);
+ if (ret) {
+ if (ret < 0)
+ gf_log ("cli", GF_LOG_ERROR,
+ "config command parsing failed.");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "type",
+ GF_SNAP_OPTION_TYPE_CONFIG);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to set "
+ "config type");
+ ret = -1;
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ {
+ /* Syntax :
+ * gluster snapshot status [(snapname |
+ * volume <volname>)]
+ */
+ ret = cli_snap_status_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot status command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ {
+ /* Syntax:
+ * snapshot restore <snapname>
+ */
+ ret = cli_snap_restore_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "restore command");
+ goto out;
+ }
+ break;
+ }
+ default:
+ gf_log ("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ break;
+ }
+
+ ret = dict_set_int32 (dict, "type", type);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to set type.");
+ goto out;
+ }
+ /* If you got so far, input is valid */
+ ret = 0;
+out:
+ if (ret) {
+ if (dict)
+ dict_destroy (dict);
+ } else
+ *options = dict;
+
+ return ret;
+}
diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c
index 3b41195a1..551312411 100644
--- a/cli/src/cli-cmd-peer.c
+++ b/cli/src/cli-cmd-peer.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -31,6 +21,7 @@
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
+#include "cli1-xdr.h"
#include "protocol-common.h"
extern struct rpc_clnt *global_rpc;
@@ -50,6 +41,7 @@ cli_cmd_peer_probe_cbk (struct cli_state *state, struct cli_cmd_word *word,
dict_t *dict = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
if (!(wordcount == 3)) {
cli_usage_out (word->pattern);
@@ -71,12 +63,14 @@ cli_cmd_peer_probe_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (ret)
goto out;
- ret = valid_internet_address ((char *) words[2]);
+ ret = valid_internet_address ((char *) words[2], _gf_false);
if (ret == 1) {
ret = 0;
} else {
+ cli_out ("%s is an invalid address", words[2]);
cli_usage_out (word->pattern);
parse_error = 1;
+ ret = -1;
goto out;
}
/* if (words[3]) {
@@ -85,6 +79,9 @@ cli_cmd_peer_probe_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
*/
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, dict);
}
@@ -95,6 +92,9 @@ out:
if ((sent == 0) && (parse_error == 0))
cli_out ("Peer probe failed");
}
+
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -107,10 +107,12 @@ cli_cmd_peer_deprobe_cbk (struct cli_state *state, struct cli_cmd_word *word,
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
dict_t *dict = NULL;
+ int flags = 0;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
- if (!(wordcount == 3) ) {
+ if ((wordcount < 3) || (wordcount > 4)) {
cli_usage_out (word->pattern);
parse_error = 1;
goto out;
@@ -134,6 +136,22 @@ cli_cmd_peer_deprobe_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
*/
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3]))
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ else {
+ ret = -1;
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ }
+ ret = dict_set_int32 (dict, "flags", flags);
+ if (ret)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, dict);
}
@@ -145,6 +163,8 @@ out:
cli_out ("Peer detach failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -171,7 +191,7 @@ cli_cmd_peer_status_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
if (proc->fn) {
- ret = proc->fn (frame, THIS, (char *)words[1] );
+ ret = proc->fn (frame, THIS, (void *)GF_CLI_LIST_PEERS);
}
out:
@@ -180,6 +200,48 @@ out:
if ((sent == 0) && (parse_error == 0))
cli_out ("Peer status failed");
}
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_pool_list_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+ if (wordcount != 2) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS,
+ (void *)GF_CLI_LIST_POOL_NODES);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_err ("pool list: command execution failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -188,7 +250,7 @@ struct cli_cmd cli_probe_cmds[] = {
cli_cmd_peer_probe_cbk,
"probe peer specified by <HOSTNAME>"},
- { "peer detach <HOSTNAME>",
+ { "peer detach <HOSTNAME> [force]",
cli_cmd_peer_deprobe_cbk,
"detach peer specified by <HOSTNAME>"},
@@ -200,6 +262,10 @@ struct cli_cmd cli_probe_cmds[] = {
cli_cmd_peer_help_cbk,
"Help command for peer "},
+ { "pool list",
+ cli_cmd_pool_list_cbk,
+ "list all the nodes in the pool (including localhost)"},
+
{ NULL, NULL, NULL }
};
diff --git a/cli/src/cli-cmd-snapshot.c b/cli/src/cli-cmd-snapshot.c
new file mode 100644
index 000000000..de492d683
--- /dev/null
+++ b/cli/src/cli-cmd-snapshot.c
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+
+extern rpc_clnt_prog_t *cli_rpc_prog;
+
+int
+cli_cmd_snapshot_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_snapshot_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = 0;
+ int parse_err = 0;
+ dict_t *options = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+
+ proc = &cli_rpc_prog->proctable [GLUSTER_CLI_SNAP];
+ if (proc == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Parses the command entered by the user */
+ ret = cli_cmd_snapshot_parse (words, wordcount, &options, state);
+ if (ret) {
+ if (ret < 0) {
+ cli_usage_out (word->pattern);
+ parse_err = 1;
+ }
+ else {
+ /* User might have cancelled the snapshot operation */
+ ret = 0;
+ }
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn)
+ ret = proc->fn (frame, THIS, options);
+
+out:
+ if (ret && parse_err == 0)
+ cli_out ("Snapshot command failed");
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+struct cli_cmd snapshot_cmds[] = {
+ { "snapshot help",
+ cli_cmd_snapshot_help_cbk,
+ "display help for snapshot commands"
+ },
+ { "snapshot create <snapname> <volname(s)> [description <description>] [force]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Create."
+ },
+ { "snapshot restore <snapname>",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Restore."
+ },
+ { "snapshot status [(snapname | volume <volname>)]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Status."
+ },
+ { "snapshot info [(snapname | volume <volname>)]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Info."
+ },
+ { "snapshot list [volname]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot List."
+ },
+ {"snapshot config [volname] [snap-max-hard-limit <count>] [snap-max-soft-limit <percent>]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Config."
+ },
+ {"snapshot delete <snapname>",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Delete."
+ },
+ { NULL, NULL, NULL }
+};
+
+int
+cli_cmd_snapshot_help_cbk (struct cli_state *state,
+ struct cli_cmd_word *in_word,
+ const char **words,
+ int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = snapshot_cmds; cmd->pattern; cmd++)
+ if (_gf_false == cmd->disable)
+ cli_out ("%s - %s", cmd->pattern, cmd->desc);
+
+ return 0;
+}
+
+int
+cli_cmd_snapshot_register (struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = snapshot_cmds; cmd->pattern; cmd++) {
+
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c
index 25938b897..8cfa5e70c 100644
--- a/cli/src/cli-cmd-system.c
+++ b/cli/src/cli-cmd-system.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -41,6 +31,12 @@ extern rpc_clnt_prog_t *cli_rpc_prog;
int cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount);
+int cli_cmd_copy_file_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
+int cli_cmd_sys_exec_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
int
cli_cmd_getspec_cbk (struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount)
@@ -288,6 +284,114 @@ cli_cmd_umount_cbk (struct cli_state *state, struct cli_cmd_word *word,
return ret;
}
+int
+cli_cmd_uuid_get_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (wordcount != 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_GET];
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ if (proc->fn)
+ ret = proc->fn (frame, this, dict);
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("uuid get failed");
+ }
+
+ if (dict)
+ dict_unref (dict);
+
+ CLI_STACK_DESTROY (frame);
+ return ret;
+}
+
+int
+cli_cmd_uuid_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ char *question = NULL;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ question = "Resetting uuid changes the uuid of local glusterd. "
+ "Do you want to continue?";
+
+ if (wordcount != 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_RESET];
+
+ this = THIS;
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ answer = cli_cmd_get_confirmation (state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+
+ //send NULL as argument since no dictionary is sent to glusterd
+ if (proc->fn) {
+ ret = proc->fn (frame, this, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("uuid reset failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
struct cli_cmd cli_system_cmds[] = {
{ "system:: getspec <VOLID>",
cli_cmd_getspec_cbk,
@@ -313,14 +417,163 @@ struct cli_cmd cli_system_cmds[] = {
cli_cmd_umount_cbk,
"request an umount"},
+ { "system:: uuid get",
+ cli_cmd_uuid_get_cbk,
+ "get uuid of glusterd"},
+
+ { "system:: uuid reset",
+ cli_cmd_uuid_reset_cbk,
+ "reset the uuid of glusterd"},
+
{ "system:: help",
cli_cmd_system_help_cbk,
"display help for system commands"},
+ { "system:: copy file [<filename>]",
+ cli_cmd_copy_file_cbk,
+ "Copy file from current node's $working_dir to "
+ "$working_dir of all cluster nodes"},
+
+ { "system:: execute <command> <args>",
+ cli_cmd_sys_exec_cbk,
+ "Execute the command on all the nodes "
+ "in the cluster and display their output."},
+
{ NULL, NULL, NULL }
};
int
+cli_cmd_sys_exec_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ char cmd_arg_name[PATH_MAX] = "";
+ char *command = NULL;
+ char *saveptr = NULL;
+ char *tmp = NULL;
+ int ret = -1;
+ int i = -1;
+ int cmd_args_count = 0;
+ int in_cmd_args_count = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (wordcount < 3) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ command = strtok_r ((char *)words[2], " ", &saveptr);
+ do {
+ tmp = strtok_r (NULL, " ", &saveptr);
+ if (tmp) {
+ in_cmd_args_count++;
+ memset (cmd_arg_name, '\0', sizeof(cmd_arg_name));
+ snprintf (cmd_arg_name, sizeof(cmd_arg_name),
+ "cmd_arg_%d", in_cmd_args_count);
+ ret = dict_set_str (dict, cmd_arg_name, tmp);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set "
+ "%s in dict", cmd_arg_name);
+ goto out;
+ }
+ }
+ } while (tmp);
+
+ cmd_args_count = wordcount - 3;
+
+ ret = dict_set_str (dict, "command", command);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set command in dict");
+ goto out;
+ }
+
+ for (i=1; i <= cmd_args_count; i++) {
+ in_cmd_args_count++;
+ memset (cmd_arg_name, '\0', sizeof(cmd_arg_name));
+ snprintf (cmd_arg_name, sizeof(cmd_arg_name),
+ "cmd_arg_%d", in_cmd_args_count);
+ ret = dict_set_str (dict, cmd_arg_name,
+ (char *)words[2+i]);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set %s in dict",
+ cmd_arg_name);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "cmd_args_count", in_cmd_args_count);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set cmd_args_count in dict");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", "N/A");
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set volname in dict");
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYS_EXEC];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ ret = proc->fn (frame, THIS, (void*)dict);
+ }
+out:
+ return ret;
+}
+
+int
+cli_cmd_copy_file_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *filename = "";
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ filename = (char*)words[3];
+ ret = dict_set_str (dict, "source", filename);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Unable to set filename in dict");
+
+ ret = dict_set_str (dict, "volname", "N/A");
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Unable to set volname in dict");
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_COPY_FILE];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ ret = proc->fn (frame, THIS, (void*)dict);
+ }
+out:
+ return ret;
+}
+
+int
cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount)
{
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 9b85bf819..100be0b73 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -88,9 +78,9 @@ cli_cmd_volume_info_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!local)
goto out;
- local->u.get_vol.flags = ctx.flags;
+ local->get_vol.flags = ctx.flags;
if (ctx.volname)
- local->u.get_vol.volname = gf_strdup (ctx.volname);
+ local->get_vol.volname = gf_strdup (ctx.volname);
frame->local = local;
@@ -105,6 +95,8 @@ out:
cli_out ("Getting Volume information failed!");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -116,9 +108,15 @@ cli_cmd_sync_volume_cbk (struct cli_state *state, struct cli_cmd_word *word,
int ret = -1;
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
- gf1_cli_sync_volume_req req = {0,};
int sent = 0;
int parse_error = 0;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = "Sync volume may make data "
+ "inaccessible while the sync "
+ "is in progress. Do you want "
+ "to continue?";
if ((wordcount < 3) || (wordcount > 4)) {
cli_usage_out (word->pattern);
@@ -126,14 +124,40 @@ cli_cmd_sync_volume_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
if ((wordcount == 3) || !strcmp(words[3], "all")) {
- req.flags = GF_CLI_SYNC_ALL;
- req.volname = "";
+ ret = dict_set_int32 (dict, "flags", (int32_t)
+ GF_CLI_SYNC_ALL);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set"
+ "flag");
+ goto out;
+ }
} else {
- req.volname = (char *)words[3];
+ ret = dict_set_str (dict, "volname", (char *) words[3]);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set "
+ "volume");
+ goto out;
+ }
}
- req.hostname = (char *)words[2];
+ ret = dict_set_str (dict, "hostname", (char *) words[2]);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set hostname");
+ goto out;
+ }
+
+ if (!(state->mode & GLUSTER_MODE_SCRIPT)) {
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYNC_VOLUME];
@@ -141,8 +165,10 @@ cli_cmd_sync_volume_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
- ret = proc->fn (frame, THIS, &req);
+ ret = proc->fn (frame, THIS, dict);
}
out:
@@ -152,6 +178,8 @@ out:
cli_out ("Volume sync failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -292,13 +320,11 @@ found_bad_brick_order:
out:
ai_list_tmp2 = NULL;
i = 0;
- if (brick_list_dup)
- GF_FREE (brick_list_dup);
+ GF_FREE (brick_list_dup);
list_for_each_entry (ai_list_tmp1, &ai_list->list, list) {
if (ai_list_tmp1->info)
freeaddrinfo (ai_list_tmp1->info);
- if (ai_list_tmp2)
- free (ai_list_tmp2);
+ free (ai_list_tmp2);
ai_list_tmp2 = ai_list_tmp1;
}
free (ai_list_tmp2);
@@ -319,7 +345,7 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
int32_t brick_count = 0;
int32_t sub_count = 0;
int32_t type = GF_CLUSTER_TYPE_NONE;
-
+ cli_local_t *local = NULL;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME];
@@ -365,19 +391,31 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
}
+
+ if (state->mode & GLUSTER_MODE_SCRIPT) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume create failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -394,6 +432,8 @@ cli_cmd_volume_delete_cbk (struct cli_state *state, struct cli_cmd_word *word,
const char *question = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
question = "Deleting volume will erase all information about the volume. "
"Do you want to continue?";
@@ -403,6 +443,10 @@ cli_cmd_volume_delete_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
if (wordcount != 3) {
cli_usage_out (word->pattern);
parse_error = 1;
@@ -418,8 +462,17 @@ cli_cmd_volume_delete_cbk (struct cli_state *state, struct cli_cmd_word *word,
volname = (char *)words[2];
+ ret = dict_set_str (dict, "volname", volname);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "dict set failed");
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
- ret = proc->fn (frame, THIS, volname);
+ ret = proc->fn (frame, THIS, dict);
}
out:
@@ -429,6 +482,8 @@ out:
cli_out ("Volume delete failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -439,9 +494,11 @@ cli_cmd_volume_start_cbk (struct cli_state *state, struct cli_cmd_word *word,
int ret = -1;
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
- gf1_cli_start_vol_req req = {0,};
int sent = 0;
int parse_error = 0;
+ dict_t *dict = NULL;
+ int flags = 0;
+ cli_local_t *local = NULL;
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
@@ -453,13 +510,23 @@ cli_cmd_volume_start_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
- req.volname = (char *)words[2];
- if (!req.volname)
+ dict = dict_new ();
+ if (!dict) {
+ goto out;
+ }
+
+ if (!words[2])
goto out;
+ ret = dict_set_str (dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
if (wordcount == 4) {
if (!strcmp("force", words[3])) {
- req.flags |= GF_CLI_FLAG_OP_FORCE;
+ flags |= GF_CLI_FLAG_OP_FORCE;
} else {
ret = -1;
cli_usage_out (word->pattern);
@@ -467,11 +534,25 @@ cli_cmd_volume_start_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
}
+ ret = dict_set_int32 (dict, "flags", flags);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "dict set failed");
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to serialize dict");
+ goto out;
+ }
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_START_VOLUME];
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
- ret = proc->fn (frame, THIS, &req);
+ ret = proc->fn (frame, THIS, dict);
}
out:
@@ -481,6 +562,8 @@ out:
cli_out ("Volume start failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -489,7 +572,7 @@ cli_cmd_get_confirmation (struct cli_state *state, const char *question)
{
char answer[5] = {'\0', };
char flush = '\0';
- int len = 0;
+ size_t len;
if (state->mode & GLUSTER_MODE_SCRIPT)
return GF_ANSWER_YES;
@@ -503,7 +586,7 @@ cli_cmd_get_confirmation (struct cli_state *state, const char *question)
len = strlen (answer);
- if (answer [len - 1] == '\n'){
+ if (len && answer [len - 1] == '\n'){
answer [--len] = '\0';
} else {
do{
@@ -534,10 +617,12 @@ cli_cmd_volume_stop_cbk (struct cli_state *state, struct cli_cmd_word *word,
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
int flags = 0;
- gf1_cli_stop_vol_req req = {0,};
gf_answer_t answer = GF_ANSWER_NO;
int sent = 0;
int parse_error = 0;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ cli_local_t *local = NULL;
const char *question = "Stopping volume will make its data inaccessible. "
"Do you want to continue?";
@@ -552,9 +637,14 @@ cli_cmd_volume_stop_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
- req.volname = (char *)words[2];
- if (!req.volname)
+ volname = (char*) words[2];
+
+ dict = dict_new ();
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "dict set failed");
goto out;
+ }
if (wordcount == 4) {
if (!strcmp("force", words[3])) {
@@ -566,6 +656,12 @@ cli_cmd_volume_stop_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
}
+ ret = dict_set_int32 (dict, "flags", flags);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "dict set failed");
+ goto out;
+ }
answer = cli_cmd_get_confirmation (state, question);
@@ -574,20 +670,23 @@ cli_cmd_volume_stop_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
- req.flags = flags;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STOP_VOLUME];
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
- ret = proc->fn (frame, THIS, &req);
+ ret = proc->fn (frame, THIS, dict);
}
out:
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume stop on '%s' failed", req.volname);
+ cli_out ("Volume stop on '%s' failed", volname);
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -644,6 +743,8 @@ out:
cli_out ("Volume rename on '%s' failed", (char *)words[2]);
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -657,7 +758,7 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
dict_t *dict = NULL;
int sent = 0;
int parse_error = 0;
- int index = 0;
+ cli_local_t *local = NULL;
#ifdef GF_SOLARIS_HOST_OS
cli_out ("Command not supported on Solaris");
goto out;
@@ -667,86 +768,30 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
- dict = dict_new ();
- if (!dict)
- goto out;
+ ret = cli_cmd_volume_defrag_parse (words, wordcount, &dict);
- if (!((wordcount == 4) || (wordcount == 5) || (wordcount == 6))) {
+ if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
- goto out;
- }
-
- if (wordcount == 4) {
- index = 3;
- } else {
- if (strcmp (words[3], "fix-layout") &&
- strcmp (words[3], "migrate-data")) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- index = 4;
- }
-
- if (strcmp (words[index], "start") && strcmp (words[index], "stop") &&
- strcmp (words[index], "status")) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
-
- ret = dict_set_str (dict, "volname", (char *)words[2]);
- if (ret)
- goto out;
-
- if (wordcount == 4) {
- ret = dict_set_str (dict, "command", (char *)words[3]);
- if (ret)
- goto out;
- }
- if (wordcount == 5) {
- ret = dict_set_str (dict, "start-type", (char *)words[3]);
- if (ret)
- goto out;
- ret = dict_set_str (dict, "command", (char *)words[4]);
- if (ret)
- goto out;
- }
-
- /* 'force' option is valid only for the 'migrate-data' key */
- if (wordcount == 6) {
- if (strcmp (words[3], "migrate-data") ||
- strcmp (words[4], "start") ||
- strcmp (words[5], "force")) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- ret = dict_set_str (dict, "start-type", "migrate-data-force");
- if (ret)
- goto out;
- ret = dict_set_str (dict, "command", (char *)words[4]);
- if (ret)
- goto out;
}
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEFRAG_VOLUME];
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, dict);
}
out:
- if (dict)
- dict_destroy (dict);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume rebalance failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -756,11 +801,11 @@ cli_cmd_volume_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
{
int sent = 0;
int parse_error = 0;
-
int ret = -1;
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
dict_t *options = NULL;
+ cli_local_t *local = NULL;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_VOLUME];
@@ -769,27 +814,27 @@ cli_cmd_volume_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
ret = cli_cmd_volume_reset_parse (words, wordcount, &options);
-
if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
goto out;
}
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume reset failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -805,6 +850,7 @@ cli_cmd_volume_profile_cbk (struct cli_state *state, struct cli_cmd_word *word,
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
dict_t *options = NULL;
+ cli_local_t *local = NULL;
ret = cli_cmd_volume_profile_parse (words, wordcount, &options);
@@ -820,20 +866,21 @@ cli_cmd_volume_profile_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume profile failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -849,6 +896,8 @@ cli_cmd_volume_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SET_VOLUME];
@@ -856,28 +905,33 @@ cli_cmd_volume_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
- ret = cli_cmd_volume_set_parse (words, wordcount, &options);
-
+ ret = cli_cmd_volume_set_parse (words, wordcount, &options, &op_errstr);
if (ret) {
- cli_usage_out (word->pattern);
+ if (op_errstr) {
+ cli_err ("%s", op_errstr);
+ GF_FREE (op_errstr);
+ } else
+ cli_usage_out (word->pattern);
+
parse_error = 1;
goto out;
}
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume set failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -893,35 +947,63 @@ cli_cmd_volume_add_brick_cbk (struct cli_state *state,
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+
+ const char *question = "Changing the 'stripe count' of the volume is "
+ "not a supported feature. In some cases it may result in data "
+ "loss on the volume. Also there may be issues with regular "
+ "filesystem operations on the volume after the change. Do you "
+ "really want to continue with 'stripe' count option ? ";
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
goto out;
ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options);
-
if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
goto out;
}
+ /* TODO: there are challenges in supporting changing of
+ stripe-count, untill it is properly supported give warning to user */
+ if (dict_get (options, "stripe-count")) {
+ answer = cli_cmd_get_confirmation (state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ if (state->mode & GLUSTER_MODE_SCRIPT) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
+
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ADD_BRICK];
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume add-brick failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -937,6 +1019,7 @@ cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
call_frame_t *frame = NULL;
dict_t *options = NULL;
gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
const char *question = "Disabling quota will delete all the quota "
"configuration. Do you want to continue?";
@@ -953,6 +1036,7 @@ cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
}
ret = cli_cmd_quota_parse (words, wordcount, &options);
+
if (ret < 0) {
cli_usage_out (word->pattern);
parse_err = 1;
@@ -964,16 +1048,17 @@ cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn)
ret = proc->fn (frame, THIS, options);
out:
- if (options)
- dict_unref (options);
-
if (ret && parse_err == 0)
cli_out ("Quota command failed");
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -991,6 +1076,7 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
int sent = 0;
int parse_error = 0;
int need_question = 0;
+ cli_local_t *local = NULL;
const char *question = "Removing brick(s) can result in data loss. "
"Do you want to Continue?";
@@ -1001,7 +1087,6 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options,
&need_question);
-
if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
@@ -1019,6 +1104,8 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK];
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
@@ -1030,8 +1117,8 @@ out:
cli_out ("Volume remove-brick failed");
}
- if (options)
- dict_unref (options);
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -1048,6 +1135,7 @@ cli_cmd_volume_replace_brick_cbk (struct cli_state *state,
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
#ifdef GF_SOLARIS_HOST_OS
cli_out ("Command not supported on Solaris");
@@ -1067,20 +1155,30 @@ cli_cmd_volume_replace_brick_cbk (struct cli_state *state,
goto out;
}
+ if (state->mode & GLUSTER_MODE_SCRIPT) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force"
+ "option");
+ goto out;
+ }
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume replace-brick failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -1095,50 +1193,6 @@ cli_cmd_volume_set_transport_cbk (struct cli_state *state,
}
int
-cli_cmd_log_filename_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
-{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
-
- if (!((wordcount == 5) || (wordcount == 6))) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LOG_FILENAME];
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
- ret = cli_cmd_log_filename_parse (words, wordcount, &options);
- if (ret)
- goto out;
-
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
-
-out:
- if (options)
- dict_destroy (options);
-
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume log filename failed");
- }
-
- return ret;
-}
-
-int
cli_cmd_volume_top_cbk (struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount)
{
@@ -1149,6 +1203,7 @@ cli_cmd_volume_top_cbk (struct cli_state *state, struct cli_cmd_word *word,
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
ret = cli_cmd_volume_top_parse (words, wordcount, &options);
@@ -1164,67 +1219,25 @@ cli_cmd_volume_top_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume top failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
-int
-cli_cmd_log_locate_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
-{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
-
- if (!((wordcount == 4) || (wordcount == 5))) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LOG_LOCATE];
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
- ret = cli_cmd_log_locate_parse (words, wordcount, &options);
- if (ret)
- goto out;
-
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
-
-out:
- if (options)
- dict_destroy (options);
-
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("getting log file location information failed");
- }
-
- return ret;
-}
int
cli_cmd_log_rotate_cbk (struct cli_state *state, struct cli_cmd_word *word,
@@ -1236,6 +1249,7 @@ cli_cmd_log_rotate_cbk (struct cli_state *state, struct cli_cmd_word *word,
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
if (!((wordcount == 4) || (wordcount == 5))) {
cli_usage_out (word->pattern);
@@ -1253,19 +1267,19 @@ cli_cmd_log_rotate_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (ret)
goto out;
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_destroy (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume log rotate failed");
}
+ CLI_STACK_DESTROY (frame);
return ret;
}
@@ -1339,6 +1353,7 @@ cli_cmd_volume_gsync_set_cbk (struct cli_state *state, struct cli_cmd_word *word
dict_t *options = NULL;
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
proc = &cli_rpc_prog->proctable [GLUSTER_CLI_GSYNC_SET];
if (proc == NULL) {
@@ -1359,109 +1374,249 @@ cli_cmd_volume_gsync_set_cbk (struct cli_state *state, struct cli_cmd_word *word
goto out;
}
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn)
ret = proc->fn (frame, THIS, options);
out:
- if (options)
- dict_unref (options);
-
if (ret && parse_err == 0)
cli_out (GEOREP" command failed");
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
int
-cli_cmd_log_level_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_status_cbk (struct cli_state *state,
+ struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
int ret = -1;
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
dict_t *dict = NULL;
+ uint32_t cmd = 0;
+ cli_local_t *local = NULL;
+
+ ret = cli_cmd_volume_status_parse (words, wordcount, &dict);
+
+ if (ret) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
- if (wordcount != 6) {
- cli_usage_out (word->pattern);
- goto out;
+ if (!(cmd & GF_CLI_STATUS_ALL)) {
+ /* for one volume or brick */
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_VOLUME];
+ } else {
+ /* volume status all or all detail */
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_ALL];
}
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LOG_LEVEL];
+ if (!proc->fn)
+ goto out;
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
- goto out;
+ goto out;
- ret = cli_cmd_log_level_parse (words, wordcount, &dict);
- if (ret)
- goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
- if (proc->fn)
- ret = proc->fn (frame, THIS, dict);
+ ret = proc->fn (frame, THIS, dict);
+
+out:
+ CLI_STACK_DESTROY (frame);
- out:
return ret;
}
+
int
-cli_cmd_volume_status_cbk (struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
+ uint64_t free = 0;
+ uint64_t total = 0;
+ char key[1024] = {0};
+ int ret = 0;
- if (wordcount != 3) {
- cli_usage_out (word->pattern);
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.free", i);
+ ret = dict_get_uint64 (dict, key, &free);
+
+ status->free = gf_uint64_2human_readable (free);
+ if (!status->free)
goto out;
- }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_VOLUME];
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.total", i);
+ ret = dict_get_uint64 (dict, key, &total);
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
+ status->total = gf_uint64_2human_readable (total);
+ if (!status->total)
goto out;
- ret = cli_cmd_volume_status_parse (words, wordcount, &dict);
+#ifdef GF_LINUX_HOST_OS
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.device", i);
+ ret = dict_get_str (dict, key, &(status->device));
if (ret)
- goto out;
+ status->device = NULL;
+#endif
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.block_size", i);
+ ret = dict_get_uint64 (dict, key, &(status->block_size));
+ if (ret) {
+ ret = 0;
+ status->block_size = 0;
+ }
+
+#ifdef GF_LINUX_HOST_OS
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mnt_options", i);
+ ret = dict_get_str (dict, key, &(status->mount_options));
+ if (ret)
+ status->mount_options = NULL;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.fs_name", i);
+ ret = dict_get_str (dict, key, &(status->fs_name));
+ if (ret) {
+ ret = 0;
+ status->fs_name = NULL;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.inode_size", i);
+ ret = dict_get_str (dict, key, &(status->inode_size));
+ if (ret)
+ status->inode_size = NULL;
+#endif /* GF_LINUX_HOST_OS */
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.total_inodes", i);
+ ret = dict_get_uint64 (dict, key,
+ &(status->total_inodes));
+ if (ret)
+ status->total_inodes = 0;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.free_inodes", i);
+ ret = dict_get_uint64 (dict, key, &(status->free_inodes));
+ if (ret) {
+ ret = 0;
+ status->free_inodes = 0;
+ }
- if (proc->fn)
- ret = proc->fn (frame, THIS, dict);
out:
return ret;
}
+void
+cli_print_detailed_status (cli_volume_status_t *status)
+{
+ cli_out ("%-20s : %-20s", "Brick", status->brick);
+ if (status->online)
+ cli_out ("%-20s : %-20d", "Port", status->port);
+ else
+ cli_out ("%-20s : %-20s", "Port", "N/A");
+ cli_out ("%-20s : %-20c", "Online", (status->online) ? 'Y' : 'N');
+ cli_out ("%-20s : %-20s", "Pid", status->pid_str);
+
+#ifdef GF_LINUX_HOST_OS
+ if (status->fs_name)
+ cli_out ("%-20s : %-20s", "File System", status->fs_name);
+ else
+ cli_out ("%-20s : %-20s", "File System", "N/A");
+
+ if (status->device)
+ cli_out ("%-20s : %-20s", "Device", status->device);
+ else
+ cli_out ("%-20s : %-20s", "Device", "N/A");
+
+ if (status->mount_options) {
+ cli_out ("%-20s : %-20s", "Mount Options",
+ status->mount_options);
+ } else {
+ cli_out ("%-20s : %-20s", "Mount Options", "N/A");
+ }
+
+ if (status->inode_size) {
+ cli_out ("%-20s : %-20s", "Inode Size",
+ status->inode_size);
+ } else {
+ cli_out ("%-20s : %-20s", "Inode Size", "N/A");
+ }
+#endif
+ if (status->free)
+ cli_out ("%-20s : %-20s", "Disk Space Free", status->free);
+ else
+ cli_out ("%-20s : %-20s", "Disk Space Free", "N/A");
+
+ if (status->total)
+ cli_out ("%-20s : %-20s", "Total Disk Space", status->total);
+ else
+ cli_out ("%-20s : %-20s", "Total Disk Space", "N/A");
+
+
+ if (status->total_inodes) {
+ cli_out ("%-20s : %-20ld", "Inode Count",
+ status->total_inodes);
+ } else {
+ cli_out ("%-20s : %-20s", "Inode Count", "N/A");
+ }
+
+ if (status->free_inodes) {
+ cli_out ("%-20s : %-20ld", "Free Inodes",
+ status->free_inodes);
+ } else {
+ cli_out ("%-20s : %-20s", "Free Inodes", "N/A");
+ }
+}
int
-cli_print_brick_status (char *brick, int port, int online, int pid)
+cli_print_brick_status (cli_volume_status_t *status)
{
int fieldlen = CLI_VOL_STATUS_BRICK_LEN;
- char buf[80] = {0,};
int bricklen = 0;
- int i = 0;
char *p = NULL;
int num_tabs = 0;
- bricklen = strlen (brick);
- p = brick;
+ p = status->brick;
+ bricklen = strlen (p);
while (bricklen > 0) {
if (bricklen > fieldlen) {
- i++;
- strncpy (buf, p, fieldlen);
- buf[strlen(buf) + 1] = '\0';
- cli_out ("%s", buf);
- p = brick + i * fieldlen;
+ cli_out ("%.*s", fieldlen, p);
+ p += fieldlen;
bricklen -= fieldlen;
} else {
num_tabs = (fieldlen - bricklen) / CLI_TAB_LENGTH + 1;
printf ("%s", p);
while (num_tabs-- != 0)
printf ("\t");
- cli_out ("%d\t%c\t%d", port, online?'Y':'N', pid);
+ if (status->port) {
+ if (status->online)
+ cli_out ("%d\t%c\t%s",
+ status->port,
+ status->online?'Y':'N',
+ status->pid_str);
+ else
+ cli_out ("%s\t%c\t%s",
+ "N/A",
+ status->online?'Y':'N',
+ status->pid_str);
+ }
+ else
+ cli_out ("%s\t%c\t%s",
+ "N/A", status->online?'Y':'N',
+ status->pid_str);
bricklen = 0;
}
}
@@ -1476,28 +1631,36 @@ cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
int ret = -1;
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
- gf1_cli_heal_vol_req req = {0,};
int sent = 0;
int parse_error = 0;
+ dict_t *options = NULL;
+ xlator_t *this = NULL;
+ cli_local_t *local = NULL;
- frame = create_frame (THIS, THIS->ctx->pool);
+ this = THIS;
+ frame = create_frame (this, this->ctx->pool);
if (!frame)
goto out;
- if (wordcount != 3) {
+ if (wordcount < 3) {
cli_usage_out (word->pattern);
- parse_error = 1;
+ parse_error = 1;
goto out;
}
- req.volname = (char *)words[2];
- if (!req.volname)
+ ret = cli_cmd_volume_heal_options_parse (words, wordcount, &options);
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
goto out;
+ }
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
- ret = proc->fn (frame, THIS, &req);
+ ret = proc->fn (frame, THIS, options);
}
out:
@@ -1507,6 +1670,8 @@ out:
cli_out ("Volume heal failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
@@ -1520,6 +1685,7 @@ cli_cmd_volume_statedump_cbk (struct cli_state *state, struct cli_cmd_word *word
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
@@ -1531,7 +1697,7 @@ cli_cmd_volume_statedump_cbk (struct cli_state *state, struct cli_cmd_word *word
goto out;
}
- if (wordcount > 3) {
+ if (wordcount >= 3) {
ret = cli_cmd_volume_statedump_options_parse (words, wordcount,
&options);
if (ret) {
@@ -1541,19 +1707,6 @@ cli_cmd_volume_statedump_cbk (struct cli_state *state, struct cli_cmd_word *word
cli_out ("Error parsing options");
cli_usage_out (word->pattern);
}
- } else {
- options = dict_new ();
- if (!options) {
- ret = -1;
- gf_log ("cli", GF_LOG_ERROR, "Could not create dict");
- goto out;
- }
- ret = dict_set_str (options, "options","");
- if (ret)
- goto out;
- ret = dict_set_int32 (options, "option-cnt", 0);
- if (ret)
- goto out;
}
ret = dict_set_str (options, "volname", (char *)words[2]);
@@ -1561,6 +1714,9 @@ cli_cmd_volume_statedump_cbk (struct cli_state *state, struct cli_cmd_word *word
goto out;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATEDUMP_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
@@ -1572,16 +1728,113 @@ out:
cli_out ("Volume statedump failed");
}
+ CLI_STACK_DESTROY (frame);
+
return ret;
}
+int
+cli_cmd_volume_list_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ int sent = 0;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_VOLUME];
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, NULL);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if (sent == 0)
+ cli_out ("Volume list failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_clearlocks_cbk (struct cli_state *state,
+ struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (wordcount < 7 || wordcount > 8) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ ret = cli_cmd_volume_clrlks_opts_parse (words, wordcount, &options);
+ if (ret) {
+ parse_error = 1;
+ gf_log ("cli", GF_LOG_ERROR, "Error parsing "
+ "clear-locks options");
+ cli_out ("Error parsing options");
+ cli_usage_out (word->pattern);
+ }
+
+ ret = dict_set_str (options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "path", (char *)words[3]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CLRLOCKS_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error = 0))
+ cli_out ("Volume clear-locks failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
struct cli_cmd volume_cmds[] = {
{ "volume info [all|<VOLNAME>]",
cli_cmd_volume_info_cbk,
"list information of all volumes"},
- { "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ...",
+ { "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] "
+ "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>"
+#ifdef HAVE_BD_XLATOR
+ "?<vg_name>"
+#endif
+ "... [force]",
+
cli_cmd_volume_create_cbk,
"create a new volume of specified type with mentioned bricks"},
@@ -1601,19 +1854,19 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_rename_cbk,
"rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
- { "volume add-brick <VOLNAME> <NEW-BRICK> ...",
+ { "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ... [force]",
cli_cmd_volume_add_brick_cbk,
"add brick to volume <VOLNAME>"},
- { "volume remove-brick <VOLNAME> <BRICK> ... {start|pause|abort|status|commit|force}",
+ { "volume remove-brick <VOLNAME> [replica <COUNT>] <BRICK> ... [start|stop|status|commit|force]",
cli_cmd_volume_remove_brick_cbk,
"remove brick from volume <VOLNAME>"},
- { "volume rebalance <VOLNAME> [fix-layout|migrate-data] {start|stop|status} [force]",
+ { "volume rebalance <VOLNAME> [fix-layout] {start|stop|status} [force]",
cli_cmd_volume_defrag_cbk,
"rebalance operations"},
- { "volume replace-brick <VOLNAME> <BRICK> <NEW-BRICK> {start|pause|abort|status|commit}",
+ { "volume replace-brick <VOLNAME> <BRICK> <NEW-BRICK> {start [force]|pause|abort|status|commit [force]}",
cli_cmd_volume_replace_brick_cbk,
"replace-brick operations"},
@@ -1629,14 +1882,6 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_help_cbk,
"display help for the volume command"},
- { "volume log filename <VOLNAME> [BRICK] <PATH>",
- cli_cmd_log_filename_cbk,
- "set the log file for corresponding volume/brick"},
-
- { "volume log locate <VOLNAME> [BRICK]",
- cli_cmd_log_locate_cbk,
- "locate the log file for corresponding volume/brick"},
-
{ "volume log rotate <VOLNAME> [BRICK]",
cli_cmd_log_rotate_cbk,
"rotate the log file for corresponding volume/brick"},
@@ -1650,13 +1895,14 @@ struct cli_cmd volume_cmds[] = {
"reset all the reconfigured options"},
#if (SYNCDAEMON_COMPILE)
- {"volume "GEOREP" [<VOLNAME>] [<SLAVE-URL>] {start|stop|config|status} [options...]",
+ {"volume "GEOREP" [<VOLNAME>] [<SLAVE-URL>] {create [push-pem] [force]"
+ "|start [force]|stop [force]|config|status [detail]|delete} [options...]",
cli_cmd_volume_gsync_set_cbk,
"Geo-sync operations",
cli_cmd_check_gsync_exists_cbk},
#endif
- { "volume profile <VOLNAME> {start|info|stop}",
+ { "volume profile <VOLNAME> {start|stop|info [nfs]}",
cli_cmd_volume_profile_cbk,
"volume profile operations"},
@@ -1664,28 +1910,35 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_quota_cbk,
"quota translator specific operations"},
- { "volume top <VOLNAME> {[open|read|write|opendir|readdir] "
- "|[read-perf|write-perf bs <size> count <count>]} "
- " [brick <brick>] [list-cnt <count>]",
+ { "volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] |\n"
+ "volume top <VOLNAME> {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>]",
cli_cmd_volume_top_cbk,
"volume top operations"},
- {"volume log level <VOLNAME> <XLATOR[*]> <LOGLEVEL>",
- cli_cmd_log_level_cbk,
- "log level for translator"},
-
- { "volume status <VOLNAME>",
+ { "volume status [all | <VOLNAME> [nfs|shd|<BRICK>]]"
+ " [detail|clients|mem|inode|fd|callpool|tasks]",
cli_cmd_volume_status_cbk,
- "display status of specified volume"},
+ "display status of all or specified volume(s)/brick"},
- { "volume heal <VOLNAME>",
+ { "volume heal <VOLNAME> [{full | statistics {heal-count {replica <hostname:brickname>}} |info {healed | heal-failed | split-brain}}]",
cli_cmd_volume_heal_cbk,
- "Start healing of volume specified by <VOLNAME>"},
+ "self-heal commands on volume specified by <VOLNAME>"},
- {"volume statedump <VOLNAME> [all|mem|iobuf|callpool|priv|fd|inode]...",
+ {"volume statedump <VOLNAME> [nfs] [all|mem|iobuf|callpool|priv|fd|"
+ "inode|history]...",
cli_cmd_volume_statedump_cbk,
"perform statedump on bricks"},
+ {"volume list",
+ cli_cmd_volume_list_cbk,
+ "list all volumes in cluster"},
+
+ {"volume clear-locks <VOLNAME> <path> kind {blocked|granted|all}"
+ "{inode [range]|entry [basename]|posix [range]}",
+ cli_cmd_volume_clearlocks_cbk,
+ "Clear locks held on path"
+ },
+
{ NULL, NULL, NULL }
};
diff --git a/cli/src/cli-cmd.c b/cli/src/cli-cmd.c
index 17869eb61..b81f75b5b 100644
--- a/cli/src/cli-cmd.c
+++ b/cli/src/cli-cmd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -60,6 +50,9 @@ cli_cmd_needs_connection (struct cli_cmd_word *word)
if (!strcasecmp ("getwd", word->word))
return 1;
+ if (!strcasecmp ("exit", word->word))
+ return 0;
+
return CLI_DEFAULT_CONN_TIMEOUT;
}
@@ -208,8 +201,7 @@ cli_cmd_process_line (struct cli_state *state, const char *text)
ret = cli_cmd_process (state, count, tokens);
out:
- if (copy)
- free (copy);
+ free (copy);
if (tokens)
cli_cmd_tokens_destroy (tokens);
@@ -239,6 +231,9 @@ cli_cmds_register (struct cli_state *state)
if (ret)
goto out;
+ ret = cli_cmd_snapshot_register (state);
+ if (ret)
+ goto out;
out:
return ret;
}
@@ -297,8 +292,6 @@ cli_cmd_await_response (unsigned time)
cmd_done = 0;
- cli_cmd_unlock ();
-
if (ret)
return ret;
@@ -370,8 +363,11 @@ cli_cmd_submit (void *req, call_frame_t *frame,
int ret = -1;
unsigned timeout = 0;
- timeout = (GLUSTER_CLI_PROFILE_VOLUME == procnum) ?
- CLI_TOP_CMD_TIMEOUT : CLI_DEFAULT_CMD_TIMEOUT;
+ if ((GLUSTER_CLI_PROFILE_VOLUME == procnum) ||
+ (GLUSTER_CLI_HEAL_VOLUME == procnum))
+ timeout = CLI_TEN_MINUTES_TIMEOUT;
+ else
+ timeout = CLI_DEFAULT_CMD_TIMEOUT;
cli_cmd_lock ();
cmd_sent = 0;
@@ -381,8 +377,9 @@ cli_cmd_submit (void *req, call_frame_t *frame,
if (!ret) {
cmd_sent = 1;
ret = cli_cmd_await_response (timeout);
- } else
- cli_cmd_unlock ();
+ }
+
+ cli_cmd_unlock ();
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
diff --git a/cli/src/cli-cmd.h b/cli/src/cli-cmd.h
index 82e0ff113..041729276 100644
--- a/cli/src/cli-cmd.h
+++ b/cli/src/cli-cmd.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_CMD_H__
#define __CLI_CMD_H__
@@ -30,6 +20,32 @@
#include "cli.h"
#include "list.h"
+#define CLI_LOCAL_INIT(local, words, frame, dictionary) \
+ do { \
+ local = cli_local_get (); \
+ \
+ if (local) { \
+ local->words = words; \
+ if (dictionary) \
+ local->dict = dictionary; \
+ if (frame) \
+ frame->local = local; \
+ } \
+ } while (0)
+
+#define CLI_STACK_DESTROY(_frame) \
+ do { \
+ if (_frame) { \
+ if (_frame->local) { \
+ gf_log ("cli", GF_LOG_DEBUG, "frame->local " \
+ "is not NULL (%p)", _frame->local); \
+ cli_local_wipe (_frame->local); \
+ _frame->local = NULL; \
+ } \
+ STACK_DESTROY (_frame->root); \
+ } \
+ } while (0);
+
typedef enum {
GF_ANSWER_YES = 1,
GF_ANSWER_NO = 2
@@ -77,6 +93,8 @@ int cli_cmd_probe_register (struct cli_state *state);
int cli_cmd_system_register (struct cli_state *state);
+int cli_cmd_snapshot_register (struct cli_state *state);
+
int cli_cmd_misc_register (struct cli_state *state);
struct cli_cmd_word *cli_cmd_nextword (struct cli_cmd_word *word,
@@ -102,4 +120,5 @@ cli_cmd_submit (void *req, call_frame_t *frame,
gf_answer_t
cli_cmd_get_confirmation (struct cli_state *state, const char *question);
int cli_cmd_sent_status_get (int *status);
+
#endif /* __CLI_CMD_H__ */
diff --git a/cli/src/cli-mem-types.h b/cli/src/cli-mem-types.h
index 3c49d2183..09fcb639b 100644
--- a/cli/src/cli-mem-types.h
+++ b/cli/src/cli-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_MEM_TYPES_H__
#define __CLI_MEM_TYPES_H__
diff --git a/cli/src/cli-rl.c b/cli/src/cli-rl.c
index 80b14620f..ade1c8ebb 100644
--- a/cli/src/cli-rl.c
+++ b/cli/src/cli-rl.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -48,7 +38,6 @@ cli_rl_out (struct cli_state *state, const char *fmt, va_list ap)
{
int tmp_rl_point = rl_point;
int n = rl_end;
- int i = 0;
int ret = 0;
if (rl_end >= 0 ) {
@@ -56,12 +45,7 @@ cli_rl_out (struct cli_state *state, const char *fmt, va_list ap)
rl_redisplay ();
}
- printf ("\r");
-
- for (i = 0; i <= strlen (state->prompt); i++)
- printf (" ");
-
- printf ("\r");
+ printf ("\r%*s\r", (int)strlen (state->prompt), "");
ret = vprintf (fmt, ap);
@@ -77,6 +61,34 @@ cli_rl_out (struct cli_state *state, const char *fmt, va_list ap)
return ret;
}
+int
+cli_rl_err (struct cli_state *state, const char *fmt, va_list ap)
+{
+ int tmp_rl_point = rl_point;
+ int n = rl_end;
+ int ret = 0;
+
+ if (rl_end >= 0 ) {
+ rl_kill_text (0, rl_end);
+ rl_redisplay ();
+ }
+
+ fprintf (stderr, "\r%*s\r", (int)strlen (state->prompt), "");
+
+ ret = vfprintf (stderr, fmt, ap);
+
+ fprintf (stderr, "\n");
+ fflush(stderr);
+
+ if (n) {
+ rl_do_undo ();
+ rl_point = tmp_rl_point;
+ rl_reset_line_state ();
+ }
+
+ return ret;
+}
+
void
cli_rl_process_line (char *line)
@@ -89,9 +101,14 @@ cli_rl_process_line (char *line)
state->rl_processing = 1;
{
ret = cli_cmd_process_line (state, line);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to process line");
+
add_history (line);
}
state->rl_processing = 0;
+
}
@@ -187,7 +204,7 @@ cli_rl_tokenize (const char *text)
}
if (i < count) {
- /* symoblize that what needs to be autocompleted is
+ /* symbolize that what needs to be autocompleted is
the full set of possible nextwords, and not extend
the last word
*/
@@ -199,8 +216,7 @@ cli_rl_tokenize (const char *text)
}
out:
- if (copy)
- free (copy);
+ free (copy);
if (i < count) {
cli_cmd_tokens_destroy (tokens);
@@ -347,9 +363,10 @@ cli_rl_input (void *_data)
for (;;) {
line = readline (state->prompt);
if (!line)
- break;
+ exit(0); //break;
- cli_rl_process_line (line);
+ if (*line)
+ cli_rl_process_line (line);
free (line);
}
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 1e77ae0ab..bfeb854ad 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -1,32 +1,28 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
-#ifndef GSYNC_CONF
-#define GSYNC_CONF GEOREP"/gsyncd.conf"
-#endif
-#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+/* Widths of various columns in top read/write-perf output
+ * Total width of top read/write-perf should be 80 chars
+ * including one space between column
+ */
+#define VOL_TOP_PERF_FILENAME_DEF_WIDTH 47
+#define VOL_TOP_PERF_FILENAME_ALT_WIDTH 44
+#define VOL_TOP_PERF_SPEED_WIDTH 4
+#define VOL_TOP_PERF_TIME_WIDTH 26
+
+#define INDENT_MAIN_HEAD "%-25s %s "
#include "cli.h"
#include "compat-errno.h"
@@ -50,25 +46,39 @@ extern rpc_clnt_prog_t *cli_rpc_prog;
extern int cli_op_ret;
extern int connected;
-char *cli_volume_type[] = {"Distribute",
- "Stripe",
- "Replicate",
- "Striped-Replicate (RAID 01)",
- "Distributed-Stripe",
- "Distributed-Replicate",
- "Distributed-Striped-Replicate (RAID 01)",
-};
-
-
-char *cli_volume_status[] = {"Created",
- "Started",
- "Stopped"
+char *cli_vol_type_str[] = {"Distribute",
+ "Stripe",
+ "Replicate",
+ "Striped-Replicate",
+ "Distributed-Stripe",
+ "Distributed-Replicate",
+ "Distributed-Striped-Replicate",
+ };
+
+char *cli_vol_status_str[] = {"Created",
+ "Started",
+ "Stopped",
+ };
+
+char *cli_vol_task_status_str[] = {"not started",
+ "in progress",
+ "stopped",
+ "completed",
+ "failed",
+ "fix-layout in progress",
+ "fix-layout stopped",
+ "fix-layout completed",
+ "fix-layout failed",
};
int32_t
-gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_get_volume (call_frame_t *frame, xlator_t *this,
void *data);
+int
+cli_to_glusterd (gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this,
+ rpc_clnt_prog_t *prog, struct iobref *iobref);
rpc_clnt_prog_t cli_handshake_prog = {
.progname = "cli handshake",
@@ -82,76 +92,51 @@ rpc_clnt_prog_t cli_pmap_prog = {
.progver = GLUSTER_PMAP_VERSION,
};
-
int
-gf_cli3_1_probe_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_probe_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_probe_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_probe_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
//rsp.op_ret = -1;
//rsp.op_errno = EINVAL;
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to probe");
- if (!rsp.op_ret) {
- switch (rsp.op_errno) {
- case GF_PROBE_SUCCESS:
- cli_out ("Probe successful");
- break;
- case GF_PROBE_LOCALHOST:
- cli_out ("Probe on localhost not needed");
- break;
- case GF_PROBE_FRIEND:
- cli_out ("Probe on host %s port %d already"
- " in peer list", rsp.hostname, rsp.port);
- break;
- default:
- cli_out ("Probe returned with unknown errno %d",
- rsp.op_errno);
- break;
- }
- }
- if (rsp.op_ret) {
- switch (rsp.op_errno) {
- case GF_PROBE_ANOTHER_CLUSTER:
- cli_out ("%s is already part of "
- "another cluster", rsp.hostname);
- break;
- case GF_PROBE_VOLUME_CONFLICT:
- cli_out ("Atleast one volume on %s conflicts "
- "with existing volumes in the "
- "cluster", rsp.hostname);
- break;
- case GF_PROBE_UNKNOWN_PEER:
- cli_out ("%s responded with 'unknown peer' error, "
- "this could happen if %s doesn't have"
- " localhost in its peer database",
- rsp.hostname, rsp.hostname);
- break;
- case GF_PROBE_ADD_FAILED:
- cli_out ("Failed to add peer information "
- "on %s" , rsp.hostname);
- break;
+ if (rsp.op_errstr && (strlen (rsp.op_errstr) > 0)) {
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ if (rsp.op_ret)
+ gf_log ("cli", GF_LOG_ERROR, "%s", msg);
+ }
- default:
- cli_out ("Probe unsuccessful\nProbe returned "
- "with unknown errno %d", rsp.op_errno);
- break;
- }
- gf_log ("glusterd",GF_LOG_ERROR,"Probe failed with op_ret %d"
- " and op_errno %d", rsp.op_ret, rsp.op_errno);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str (NULL,
+ (rsp.op_ret)? NULL : msg,
+ rsp.op_ret, rsp.op_errno,
+ (rsp.op_ret)? msg : NULL);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
}
+
+ if (!rsp.op_ret)
+ cli_out ("peer probe: success. %s", msg);
+ else
+ cli_err ("peer probe: failed: %s", msg);
+
ret = rsp.op_ret;
out:
@@ -160,51 +145,52 @@ out:
}
int
-gf_cli3_1_deprobe_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_deprobe_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_deprobe_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_deprobe_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
//rsp.op_ret = -1;
//rsp.op_errno = EINVAL;
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to deprobe");
+
if (rsp.op_ret) {
- switch (rsp.op_errno) {
- case GF_DEPROBE_LOCALHOST:
- cli_out ("%s is localhost",
- rsp.hostname);
- break;
- case GF_DEPROBE_NOT_FRIEND:
- cli_out ("%s is not part of cluster",
- rsp.hostname);
- break;
- case GF_DEPROBE_BRICK_EXIST:
- cli_out ("Brick(s) with the peer %s exist in "
- "cluster", rsp.hostname);
- break;
- default:
- cli_out ("Detach unsuccessful\nDetach returned "
- "with unknown errno %d",
- rsp.op_errno);
- break;
+ if (strlen (rsp.op_errstr) > 0) {
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ gf_log ("cli", GF_LOG_ERROR, "%s", rsp.op_errstr);
}
- gf_log ("glusterd",GF_LOG_ERROR,"Detach failed with op_ret %d"
- " and op_errno %d", rsp.op_ret, rsp.op_errno);
} else {
- cli_out ("Detach successful");
+ snprintf (msg, sizeof (msg), "success");
}
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str (NULL,
+ (rsp.op_ret)? NULL : msg,
+ rsp.op_ret, rsp.op_errno,
+ (rsp.op_ret)? msg : NULL);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (!rsp.op_ret)
+ cli_out ("peer detach: %s", msg);
+ else
+ cli_err ("peer detach: failed: %s", msg);
ret = rsp.op_ret;
@@ -214,35 +200,143 @@ out:
}
int
-gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+gf_cli_output_peer_status (dict_t *dict, int count)
{
- gf1_cli_peer_list_rsp rsp = {0,};
- int ret = 0;
- dict_t *dict = NULL;
+ int ret = -1;
char *uuid_buf = NULL;
char *hostname_buf = NULL;
int32_t i = 1;
char key[256] = {0,};
char *state = NULL;
- int32_t port = 0;
int32_t connected = 0;
char *connected_str = NULL;
+ cli_out ("Number of Peers: %d", count);
+ i = 1;
+ while ( i <= count) {
+ snprintf (key, 256, "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.connected", i);
+ ret = dict_get_int32 (dict, key, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
+ else
+ connected_str = "Disconnected";
+
+
+ snprintf (key, 256, "friend%d.state", i);
+ ret = dict_get_str (dict, key, &state);
+ if (ret)
+ goto out;
+
+ cli_out ("\nHostname: %s\nUuid: %s\nState: %s (%s)",
+ hostname_buf, uuid_buf, state, connected_str);
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+gf_cli_output_pool_list (dict_t *dict, int count)
+{
+ int ret = -1;
+ char *uuid_buf = NULL;
+ char *hostname_buf = NULL;
+ int32_t i = 1;
+ char key[256] = {0,};
+ int32_t connected = 0;
+ char *connected_str = NULL;
+
+ if (count >= 1)
+ cli_out ("UUID\t\t\t\t\tHostname\tState");
+
+ while ( i <= count) {
+ snprintf (key, 256, "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.connected", i);
+ ret = dict_get_int32 (dict, key, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
+ else
+ connected_str = "Disconnected";
+
+ cli_out ("%s\t%-9s\t%s ", uuid_buf, hostname_buf,
+ connected_str);
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* function pointer for gf_cli_output_{pool_list,peer_status} */
+typedef int (*cli_friend_output_fn) (dict_t*, int);
+
+int
+gf_cli_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf1_cli_peer_list_rsp rsp = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ char msg[1024] = {0,};
+ char *cmd = NULL;
+ cli_friend_output_fn friend_output_fn;
+ call_frame_t *frame = NULL;
+ unsigned long flags = 0;
+
+ frame = myframe;
+ flags = (long)frame->local;
+
+ if (flags == GF_CLI_LIST_POOL_NODES) {
+ cmd = "pool list";
+ friend_output_fn = &gf_cli_output_pool_list;
+ } else {
+ cmd = "peer status";
+ friend_output_fn = &gf_cli_output_peer_status;
+ }
+
+ /* 'free' the flags set by gf_cli_list_friends */
+ frame->local = NULL;
+
if (-1 == req->rpc_status) {
goto out;
}
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
//rsp.op_ret = -1;
//rsp.op_errno = EINVAL;
goto out;
}
-
- gf_log ("cli", GF_LOG_INFO, "Received resp to list: %d",
+ gf_log ("cli", GF_LOG_DEBUG, "Received resp to list: %d",
rsp.op_ret);
ret = rsp.op_ret;
@@ -250,7 +344,19 @@ gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
if (!rsp.op_ret) {
if (!rsp.friends.friends_len) {
- cli_out ("No peers present");
+ snprintf (msg, sizeof (msg),
+ "%s: No peers present", cmd);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status (dict,
+ rsp.op_ret,
+ rsp.op_errno,
+ msg);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ cli_err ("%s", msg);
ret = 0;
goto out;
}
@@ -272,58 +378,34 @@ gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
- ret = dict_get_int32 (dict, "count", &count);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status (dict, rsp.op_ret,
+ rsp.op_errno, msg);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "count", &count);
if (ret) {
goto out;
}
- cli_out ("Number of Peers: %d", count);
-
- while ( i <= count) {
- snprintf (key, 256, "friend%d.uuid", i);
- ret = dict_get_str (dict, key, &uuid_buf);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname_buf);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.connected", i);
- ret = dict_get_int32 (dict, key, &connected);
- if (ret)
- goto out;
- if (connected)
- connected_str = "Connected";
- else
- connected_str = "Disconnected";
-
- snprintf (key, 256, "friend%d.port", i);
- ret = dict_get_int32 (dict, key, &port);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.state", i);
- ret = dict_get_str (dict, key, &state);
- if (ret)
- goto out;
-
- if (!port) {
- cli_out ("\nHostname: %s\nUuid: %s\nState: %s "
- "(%s)",
- hostname_buf, uuid_buf, state,
- connected_str);
- } else {
- cli_out ("\nHostname: %s\nPort: %d\nUuid: %s\n"
- "State: %s (%s)", hostname_buf, port,
- uuid_buf, state, connected_str);
- }
- i++;
+ ret = friend_output_fn (dict, count);
+ if (ret) {
+ goto out;
}
} else {
- ret = -1;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status (dict, rsp.op_ret,
+ rsp.op_errno, NULL);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ } else {
+ ret = -1;
+ }
goto out;
}
@@ -333,7 +415,7 @@ gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
out:
cli_cmd_broadcast_response (ret);
if (ret)
- cli_out ("Peer status unsuccessful");
+ cli_err ("%s: failed", cmd);
if (dict)
dict_destroy (dict);
@@ -367,596 +449,1067 @@ cli_out_options ( char *substr, char *optstr, char *valstr)
cli_out ("%s: %s",ptr2 , valstr);
}
+static int
+_gf_cli_output_volinfo_opts (dict_t *d, char *k,
+ data_t *v, void *tmp)
+{
+ int ret = 0;
+ char *key = NULL;
+ char *ptr = NULL;
+ data_t *value = NULL;
+
+ key = tmp;
+
+ ptr = strstr (k, "option.");
+ if (ptr) {
+ value = v;
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ cli_out_options (key, k, v->data);
+ }
+out:
+ return ret;
+}
+
int
-gf_cli3_1_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+gf_cli_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
{
- gf1_cli_get_vol_rsp rsp = {0,};
- int ret = 0;
- dict_t *dict = NULL;
- char *volname = NULL;
- int32_t i = 0;
- char key[1024] = {0,};
- int32_t status = 0;
- int32_t type = 0;
- int32_t brick_count = 0;
- int32_t dist_count = 0;
- int32_t stripe_count = 0;
- int32_t replica_count = 0;
- int32_t vol_type = 0;
- char *brick = NULL;
- int32_t j = 1;
- cli_local_t *local = NULL;
- int32_t transport = 0;
- data_pair_t *pairs = NULL;
- char *ptr = NULL;
- data_t *value = NULL;
- int opt_count = 0;
- int k = 0;
- char err_str[2048] = {0};
-
- snprintf (err_str, sizeof (err_str), "Volume info unsuccessful");
- if (-1 == req->rpc_status) {
+ int ret = -1;
+ int opt_count = 0;
+ int32_t i = 0;
+ int32_t j = 1;
+ int32_t status = 0;
+ int32_t type = 0;
+ int32_t brick_count = 0;
+ int32_t dist_count = 0;
+ int32_t stripe_count = 0;
+ int32_t replica_count = 0;
+ int32_t vol_type = 0;
+ int32_t transport = 0;
+ char *volume_id_str = NULL;
+ char *brick = NULL;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ char key[1024] = {0};
+ char err_str[2048] = {0};
+ gf_cli_rsp rsp = {0};
+ char *caps = NULL;
+ int k __attribute__((unused)) = 0;
+ // snap_volume variable helps in showing whether a volume is a normal
+ //volume or a volume for the snapshot
+ int32_t snap_volume = 0;
+
+ if (-1 == req->rpc_status)
goto out;
- }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_get_vol_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- //rsp.op_ret = -1;
- //rsp.op_errno = EINVAL;
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
-
gf_log ("cli", GF_LOG_INFO, "Received resp to get vol: %d",
rsp.op_ret);
- if (!rsp.op_ret) {
+ if (rsp.op_ret) {
+ ret = -1;
+ goto out;
+ }
- if (!rsp.volumes.volumes_len) {
- cli_out ("No volumes present");
- ret = 0;
- goto out;
- }
+ if (!rsp.dict.dict_len) {
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+ cli_err ("No volumes present");
+ ret = 0;
+ goto out;
+ }
- dict = dict_new ();
+ dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- ret = dict_unserialize (rsp.volumes.volumes_val,
- rsp.volumes.volumes_len,
- &dict);
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
- }
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to allocate memory");
+ goto out;
+ }
- ret = dict_get_int32 (dict, "count", &count);
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
- if (ret) {
- goto out;
- }
+ local = ((call_frame_t *)myframe)->local;
- local = ((call_frame_t *)myframe)->local;
- //cli_out ("Number of Volumes: %d", count);
+ if (!count) {
+ switch (local->get_vol.flags) {
- if (!count && (local->u.get_vol.flags ==
- GF_CLI_GET_NEXT_VOLUME)) {
- local->u.get_vol.volname = NULL;
+ case GF_CLI_GET_NEXT_VOLUME:
+ GF_FREE (local->get_vol.volname);
+ local->get_vol.volname = NULL;
ret = 0;
goto out;
- } else if (!count && (local->u.get_vol.flags ==
- GF_CLI_GET_VOLUME)) {
+
+ case GF_CLI_GET_VOLUME:
+ memset (err_str, 0, sizeof (err_str));
snprintf (err_str, sizeof (err_str),
"Volume %s does not exist",
- local->u.get_vol.volname);
+ local->get_vol.volname);
ret = -1;
- goto out;
+ if (!(global_state->mode & GLUSTER_MODE_XML))
+ goto out;
}
+ }
- while ( i < count) {
- cli_out (" ");
- snprintf (key, 256, "volume%d.name", i);
- ret = dict_get_str (dict, key, &volname);
- if (ret)
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ /* For GET_NEXT_VOLUME output is already begun in
+ * and will also end in gf_cli_get_next_volume()
+ */
+ if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
+ ret = cli_xml_output_vol_info_begin
+ (local, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
goto out;
+ }
+ }
- snprintf (key, 256, "volume%d.type", i);
- ret = dict_get_int32 (dict, key, &type);
- if (ret)
+ if (dict) {
+ ret = cli_xml_output_vol_info (local, dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
goto out;
+ }
+ }
- snprintf (key, 256, "volume%d.status", i);
- ret = dict_get_int32 (dict, key, &status);
+ if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
+ ret = cli_xml_output_vol_info_end (local);
if (ret)
- goto out;
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ }
+ goto out;
+ }
- snprintf (key, 256, "volume%d.brick_count", i);
- ret = dict_get_int32 (dict, key, &brick_count);
- if (ret)
- goto out;
+ while ( i < count) {
+ cli_out (" ");
+ snprintf (key, 256, "volume%d.name", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.dist_count", i);
- ret = dict_get_int32 (dict, key, &dist_count);
- if (ret)
- goto out;
+ snprintf (key, 256, "volume%d.type", i);
+ ret = dict_get_int32 (dict, key, &type);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.stripe_count", i);
- ret = dict_get_int32 (dict, key, &stripe_count);
- if (ret)
- goto out;
+ snprintf (key, 256, "volume%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.replica_count", i);
- ret = dict_get_int32 (dict, key, &replica_count);
- if (ret)
- goto out;
+ snprintf (key, sizeof (key), "volume%d.snap_volume", i);
+ ret = dict_get_int32 (dict, key, &snap_volume);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.transport", i);
- ret = dict_get_int32 (dict, key, &transport);
- if (ret)
- goto out;
+ snprintf (key, 256, "volume%d.brick_count", i);
+ ret = dict_get_int32 (dict, key, &brick_count);
+ if (ret)
+ goto out;
- vol_type = type;
-
- // Distributed (stripe/replicate/raid01) setups
- if ((type > 0) && ( dist_count < brick_count))
- vol_type = type + 3;
-
- cli_out ("Volume Name: %s", volname);
- cli_out ("Type: %s", cli_volume_type[vol_type]);
- cli_out ("Status: %s", cli_volume_status[status]);
-
- if (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)
- cli_out ("Number of Bricks: %d x %d x %d = %d",
- (brick_count / dist_count),
- stripe_count,
- replica_count,
- brick_count);
- else if (type == GF_CLUSTER_TYPE_NONE)
- cli_out ("Number of Bricks: %d",
- brick_count);
- else
- /* For both replicate and stripe, dist_count is
- good enough */
- cli_out ("Number of Bricks: %d x %d = %d",
- (brick_count / dist_count),
- dist_count,
- brick_count);
+ snprintf (key, 256, "volume%d.dist_count", i);
+ ret = dict_get_int32 (dict, key, &dist_count);
+ if (ret)
+ goto out;
+ snprintf (key, 256, "volume%d.stripe_count", i);
+ ret = dict_get_int32 (dict, key, &stripe_count);
+ if (ret)
+ goto out;
- cli_out ("Transport-type: %s",
- ((transport == 0)?"tcp":
- (transport == 1)?"rdma":
- "tcp,rdma"));
- j = 1;
+ snprintf (key, 256, "volume%d.replica_count", i);
+ ret = dict_get_int32 (dict, key, &replica_count);
+ if (ret)
+ goto out;
+ snprintf (key, 256, "volume%d.transport", i);
+ ret = dict_get_int32 (dict, key, &transport);
+ if (ret)
+ goto out;
- GF_FREE (local->u.get_vol.volname);
- local->u.get_vol.volname = gf_strdup (volname);
+ snprintf (key, 256, "volume%d.volume_id", i);
+ ret = dict_get_str (dict, key, &volume_id_str);
+ if (ret)
+ goto out;
- if (brick_count)
- cli_out ("Bricks:");
+ vol_type = type;
- while ( j <= brick_count) {
- snprintf (key, 1024, "volume%d.brick%d",
- i, j);
- ret = dict_get_str (dict, key, &brick);
+ // Distributed (stripe/replicate/stripe-replica) setups
+ if ((type > 0) && ( dist_count < brick_count))
+ vol_type = type + 3;
+
+ cli_out ("Volume Name: %s", volname);
+ cli_out ("Type: %s", cli_vol_type_str[vol_type]);
+ cli_out ("Volume ID: %s", volume_id_str);
+ cli_out ("Status: %s", cli_vol_status_str[status]);
+ if (snap_volume)
+ cli_out ("Snap Volume: %s", "yes");
+ else
+ cli_out ("Snap Volume: %s", "no");
+
+#ifdef HAVE_BD_XLATOR
+ k = 0;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.xlator%d", i, k);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ goto next;
+ do {
+ j = 0;
+ cli_out ("Xlator %d: %s", k + 1, caps);
+ do {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "volume%d.xlator%d.caps%d",
+ i, k, j++);
+ ret = dict_get_str (dict, key, &caps);
if (ret)
- goto out;
- cli_out ("Brick%d: %s", j, brick);
- j++;
- }
- pairs = dict->members_list;
- if (!pairs) {
- ret = -1;
- goto out;
- }
+ break;
+ cli_out ("Capability %d: %s", j, caps);
+ } while (1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "volume%d.xlator%d", i, ++k);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+ } while (1);
+
+next:
+#else
+ caps = 0; /* Avoid compiler warnings when BD not enabled */
+#endif
+
+ if (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
+ cli_out ("Number of Bricks: %d x %d x %d = %d",
+ (brick_count / dist_count),
+ stripe_count,
+ replica_count,
+ brick_count);
+ } else if (type == GF_CLUSTER_TYPE_NONE) {
+ cli_out ("Number of Bricks: %d", brick_count);
+ } else {
+ /* For both replicate and stripe, dist_count is
+ good enough */
+ cli_out ("Number of Bricks: %d x %d = %d",
+ (brick_count / dist_count),
+ dist_count, brick_count);
+ }
+
+ cli_out ("Transport-type: %s",
+ ((transport == 0)?"tcp":
+ (transport == 1)?"rdma":
+ "tcp,rdma"));
+ j = 1;
- snprintf (key, 256, "volume%d.opt_count",i);
- ret = dict_get_int32 (dict, key, &opt_count);
+ GF_FREE (local->get_vol.volname);
+ local->get_vol.volname = gf_strdup (volname);
+
+ if (brick_count)
+ cli_out ("Bricks:");
+
+ while (j <= brick_count) {
+ snprintf (key, 1024, "volume%d.brick%d", i, j);
+ ret = dict_get_str (dict, key, &brick);
if (ret)
- goto out;
-
- if (!opt_count)
- goto out;
-
- cli_out ("Options Reconfigured:");
- k = 0;
- while ( k < opt_count) {
-
- snprintf (key, 256, "volume%d.option.",i);
- while (pairs) {
- ptr = strstr (pairs->key, "option.");
- if (ptr) {
- value = pairs->value;
- if (!value) {
- ret = -1;
- goto out;
- }
- cli_out_options (key, pairs->key,
- value->data);
- }
- pairs = pairs->next;
- }
- k++;
- }
+ goto out;
- i++;
+ cli_out ("Brick%d: %s", j, brick);
+#ifdef HAVE_BD_XLATOR
+ snprintf (key, 256, "volume%d.vg%d", i, j);
+ ret = dict_get_str (dict, key, &caps);
+ if (!ret)
+ cli_out ("Brick%d VG: %s", j, caps);
+#endif
+ j++;
}
+ snprintf (key, 256, "volume%d.opt_count",i);
+ ret = dict_get_int32 (dict, key, &opt_count);
+ if (ret)
+ goto out;
- } else {
- ret = -1;
- goto out;
+ if (!opt_count)
+ goto out;
+
+ cli_out ("Options Reconfigured:");
+
+ snprintf (key, 256, "volume%d.option.",i);
+
+ ret = dict_foreach (dict, _gf_cli_output_volinfo_opts, key);
+ if (ret)
+ goto out;
+
+ i++;
}
ret = 0;
-
out:
cli_cmd_broadcast_response (ret);
if (ret)
- cli_out ("%s", err_str);
+ cli_err ("%s", err_str);
if (dict)
dict_destroy (dict);
- gf_log ("", GF_LOG_INFO, "Returning: %d", ret);
+ free (rsp.dict.dict_val);
+
+ free (rsp.op_errstr);
+
+ gf_log ("cli", GF_LOG_INFO, "Returning: %d", ret);
return ret;
}
int
-gf_cli3_1_create_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_create_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_create_vol_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
cli_local_t *local = NULL;
char *volname = NULL;
dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
}
local = ((call_frame_t *) (myframe))->local;
- ((call_frame_t *) (myframe))->local = NULL;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_create_vol_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- dict = local->u.create_vol.dict;
+ gf_log ("cli", GF_LOG_INFO, "Received resp to create volume");
+
+ dict = local->dict;
ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
- gf_log ("cli", GF_LOG_INFO, "Received resp to create volume");
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_vol_create (rsp_dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ cli_err ("volume create: %s: failed: %s", volname,
+ rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume create: %s: failed", volname);
else
- cli_out ("Creation of volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful":
- "successful. Please start the volume to "
- "access data.");
+ cli_out ("volume create: %s: success: "
+ "please start the volume to access data", volname);
+
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (dict)
- dict_unref (dict);
- if (local)
- cli_local_wipe (local);
- if (rsp.volname)
- free (rsp.volname);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
return ret;
}
int
-gf_cli3_1_delete_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_delete_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_delete_vol_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
cli_local_t *local = NULL;
char *volname = NULL;
call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_delete_vol_rsp);
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ local = frame->local;
+
+ if (local)
+ dict = local->dict;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "dict get failed");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to delete volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_generic_volume ("volDelete", rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
goto out;
}
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ cli_err ("volume delete: %s: failed: %s", volname,
+ rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume delete: %s: failed", volname);
+ else
+ cli_out ("volume delete: %s: success", volname);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+
+ gf_log ("", GF_LOG_INFO, "Returning with %d", ret);
+ return ret;
+}
+
+int
+gf_cli3_1_uuid_get_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ char *uuid_str = NULL;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (-1 == req->rpc_status)
+ goto out;
+
frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
local = frame->local;
frame->local = NULL;
- if (local)
- volname = local->u.delete_vol.volname;
+ gf_log ("cli", GF_LOG_INFO, "Received resp to uuid get");
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to delete volume");
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len,
+ &dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to unserialize "
+ "response for uuid get");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "uuid", &uuid_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get uuid "
+ "from dictionary");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict ("uuidGenerate", dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, "") == 0)
+ cli_err ("Get uuid was unsuccessful");
+ else
+ cli_err ("%s", rsp.op_errstr);
+
+ } else {
+ cli_out ("UUID: %s", uuid_str);
+
+ }
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ cli_local_wipe (local);
+ if (rsp.dict.dict_val)
+ free (rsp.dict.dict_val);
+ if (dict)
+ dict_unref (dict);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int
+gf_cli3_1_uuid_reset_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ local = frame->local;
+ frame->local = NULL;
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to uuid reset");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict ("uuidReset", dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ cli_err ("%s", rsp.op_errstr);
else
- cli_out ("Deleting volume %s has been %s", volname,
+ cli_out ("resetting the peer uuid has been %s",
(rsp.op_ret) ? "unsuccessful": "successful");
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
cli_local_wipe (local);
- if (rsp.volname)
- free (rsp.volname);
+ if (rsp.dict.dict_val)
+ free (rsp.dict.dict_val);
+ if (dict)
+ dict_unref (dict);
+
gf_log ("", GF_LOG_INFO, "Returning with %d", ret);
return ret;
}
int
-gf_cli3_1_start_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_start_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_start_vol_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
cli_local_t *local = NULL;
char *volname = NULL;
call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_start_vol_rsp);
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- frame = myframe;
-
- if (frame) {
+ if (frame)
local = frame->local;
- frame->local = NULL;
- }
if (local)
- volname = local->u.start_vol.volname;
+ dict = local->dict;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "dict get failed");
+ goto out;
+ }
gf_log ("cli", GF_LOG_INFO, "Received resp to start volume");
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_generic_volume ("volStart", rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ cli_err ("volume start: %s: failed: %s", volname,
+ rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume start: %s: failed", volname);
else
- cli_out ("Starting volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
+ cli_out ("volume start: %s: success", volname);
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (local)
- cli_local_wipe (local);
- if (rsp.volname)
- free (rsp.volname);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
return ret;
}
int
-gf_cli3_1_stop_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_stop_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_stop_vol_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
cli_local_t *local = NULL;
char *volname = NULL;
call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_stop_vol_rsp);
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- frame = myframe;
-
if (frame)
local = frame->local;
- if (local)
- volname = local->u.start_vol.volname;
+ if (local) {
+ dict = local->dict;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Unable to get volname from dict");
+ goto out;
+ }
+ }
gf_log ("cli", GF_LOG_INFO, "Received resp to stop volume");
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_generic_volume ("volStop", rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ cli_err ("volume stop: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume stop: %s: failed", volname);
else
- cli_out ("Stopping volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
+ cli_out ("volume stop: %s: success", volname);
+
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- if (rsp.volname)
- free (rsp.volname);
+ free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
+
return ret;
}
int
-gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf2_cli_defrag_vol_rsp rsp = {0,};
+ gf_cli_rsp rsp = {0,};
cli_local_t *local = NULL;
char *volname = NULL;
call_frame_t *frame = NULL;
char *status = "unknown";
int cmd = 0;
- int ret = 0;
+ int ret = -1;
+ dict_t *dict = NULL;
+ dict_t *local_dict = NULL;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ char msg[1024] = {0,};
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
+ int32_t counter = 0;
+ char *node_name = NULL;
+ char key[256] = {0,};
+ int32_t i = 1;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ double elapsed = 0;
+ char *size_str = NULL;
+ char *task_id_str = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf2_cli_defrag_vol_rsp);
+ (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- frame = myframe;
-
if (frame)
local = frame->local;
- if (local) {
- volname = local->u.defrag_vol.volname;
- cmd = local->u.defrag_vol.cmd;
+ if (local)
+ local_dict = local->dict;
+
+ ret = dict_get_str (local_dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to get volname");
+ goto out;
+ }
+
+ ret = dict_get_int32 (local_dict, "rebalance-command", (int32_t*)&cmd);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to get command");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ /* All other possibilites are about starting a rebalance */
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (rsp.op_ret && strcmp (rsp.op_errstr, "")) {
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ } else {
+ if (!rsp.op_ret) {
+ snprintf (msg, sizeof (msg),
+ "Starting rebalance on volume %s has "
+ "been successful.\nID: %s", volname,
+ task_id_str);
+ } else {
+ snprintf (msg, sizeof (msg),
+ "Starting rebalance on volume %s has "
+ "been unsuccessful.", volname);
+ }
+ }
+ goto done;
}
if (cmd == GF_DEFRAG_CMD_STOP) {
if (rsp.op_ret == -1) {
if (strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ snprintf (msg, sizeof (msg),
+ "%s", rsp.op_errstr);
else
- cli_out ("rebalance volume %s stop failed",
- volname);
+ snprintf (msg, sizeof (msg),
+ "rebalance volume %s stop failed",
+ volname);
+ goto done;
} else {
- cli_out ("stopped rebalance process of volume %s \n"
- "(after rebalancing %"PRId64" files totaling "
- "%"PRId64" bytes)", volname, rsp.files, rsp.size);
+ snprintf (msg, sizeof (msg),
+ "rebalance process may be in the middle of a "
+ "file migration.\nThe process will be fully "
+ "stopped once the migration of the file is "
+ "complete.\nPlease check rebalance process "
+ "for completion before doing any further "
+ "brick related tasks on the volume.");
}
- goto done;
}
if (cmd == GF_DEFRAG_CMD_STATUS) {
if (rsp.op_ret == -1) {
if (strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ snprintf (msg, sizeof (msg),
+ "%s", rsp.op_errstr);
else
- cli_out ("failed to get the status of "
- "rebalance process");
+ snprintf (msg, sizeof (msg),
+ "Failed to get the status of "
+ "rebalance process");
goto done;
}
+ }
- switch (rsp.op_errno) {
- case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
- case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED:
- status = "step 1: layout fix in progress";
- break;
- case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED:
- status = "step 2: data migration in progress";
- break;
- case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
- break;
- case GF_DEFRAG_STATUS_COMPLETE:
- status = "completed";
- break;
- case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
- status = "step 1: layout fix complete";
- break;
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
- status = "step 2: data migration complete";
- break;
- case GF_DEFRAG_STATUS_PAUSED:
- status = "paused";
- break;
- }
- if (rsp.files && (rsp.op_errno == 1)) {
- cli_out ("rebalance %s: fixed layout %"PRId64,
- status, rsp.files);
- goto done;
- }
- if (rsp.files && (rsp.op_errno == 6)) {
- cli_out ("rebalance %s: fixed layout %"PRId64,
- status, rsp.files);
- goto done;
- }
- if (rsp.files) {
- cli_out ("rebalance %s: rebalanced %"PRId64
- " files of size %"PRId64" (total files"
- " scanned %"PRId64")", status,
- rsp.files, rsp.size, rsp.lookedup_files);
- goto done;
- }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_rebalance (cmd, dict, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ goto out;
+ }
- cli_out ("rebalance %s", status);
- goto done;
+ ret = dict_get_int32 (dict, "count", &counter);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "count not set");
+ goto out;
}
- /* All other possibility is about starting a volume */
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
- else
- cli_out ("starting rebalance on volume %s has been %s",
- volname, (rsp.op_ret) ? "unsuccessful":
- "successful");
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "---------",
+ "-----------", "-----------", "-----------", "-----------",
+ "-----------", "------------", "--------------");
+ do {
+ snprintf (key, 256, "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get node-name");
-done:
- if (volname)
- GF_FREE (volname);
+ memset (key, 0, 256);
+ snprintf (key, 256, "files-%d", i);
+ ret = dict_get_uint64 (dict, key, &files);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", i);
+ ret = dict_get_uint64 (dict, key, &size);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get size of xfer");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", i);
+ ret = dict_get_uint64 (dict, key, &lookup);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get lookedup file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get status");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &failures);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get failures count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get skipped count");
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_get_double (dict, key, &elapsed);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get run-time");
+
+ status = cli_vol_task_status_str[status_rcd];
+ size_str = gf_uint64_2human_readable(size);
+ cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"PRIu64
+ " %13"PRIu64 " %20s %18.2f", node_name, files,
+ size_str, lookup, failures, skipped, status, elapsed);
+ GF_FREE(size_str);
+ i++;
+ } while (i <= counter);
+
+
+done:
+ if (global_state->mode & GLUSTER_MODE_XML)
+ cli_xml_output_str ("volRebalance", msg,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ else {
+ if (rsp.op_ret)
+ cli_err ("volume rebalance: %s: failed: %s", volname,
+ msg);
+ else
+ cli_out ("volume rebalance: %s: success: %s", volname,
+ msg);
+ }
ret = rsp.op_ret;
out:
- if (rsp.op_errstr)
- free (rsp.op_errstr); //malloced by xdr
- if (rsp.volname)
- free (rsp.volname); //malloced by xdr
+ free (rsp.op_errstr); //malloced by xdr
+ free (rsp.dict.dict_val); //malloced by xdr
+ if (dict)
+ dict_unref (dict);
cli_cmd_broadcast_response (ret);
return ret;
}
int
-gf_cli3_1_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_rename_vol_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_rename_vol_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to probe");
- cli_out ("Rename volume %s", (rsp.op_ret) ? "unsuccessful":
- "successful");
+ snprintf (msg, sizeof (msg), "Rename volume %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volRename", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume rename: failed");
+ else
+ cli_out ("volume rename: success");
ret = rsp.op_ret;
@@ -966,61 +1519,125 @@ out:
}
int
-gf_cli3_1_reset_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_reset_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_reset_vol_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_reset_vol_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to reset");
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ if (strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
else
- cli_out ("reset volume %s", (rsp.op_ret) ? "unsuccessful":
- "successful");
+ snprintf (msg, sizeof (msg), "reset volume %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volReset", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume reset: failed: %s", msg);
+ else
+ cli_out ("volume reset: success: %s", msg);
ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
+ cli_cmd_broadcast_response (ret);
return ret;
}
+char *
+is_server_debug_xlator (void *myframe)
+{
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ char **words = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *debug_xlator = NULL;
+
+ frame = myframe;
+ local = frame->local;
+ words = (char **)local->words;
+
+ while (*words != NULL) {
+ if (strstr (*words, "trace") == NULL &&
+ strstr (*words, "error-gen") == NULL) {
+ words++;
+ continue;
+ }
+
+ key = *words;
+ words++;
+ value = *words;
+ if (value == NULL)
+ break;
+ if (strstr (value, "client")) {
+ words++;
+ continue;
+ } else {
+ if (!(strstr (value, "posix") || strstr (value, "acl")
+ || strstr (value, "locks") ||
+ strstr (value, "io-threads") ||
+ strstr (value, "marker") ||
+ strstr (value, "index"))) {
+ words++;
+ continue;
+ } else {
+ debug_xlator = gf_strdup (key);
+ break;
+ }
+ }
+ }
+
+ return debug_xlator;
+}
+
int
-gf_cli3_1_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_set_vol_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
dict_t *dict = NULL;
char *help_str = NULL;
+ char msg[1024] = {0,};
+ char *debug_xlator = _gf_false;
+ char tmp_str[512] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_set_vol_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to set");
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
-
dict = dict_new ();
if (!dict) {
@@ -1030,36 +1647,73 @@ gf_cli3_1_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- if (ret)
+ /* For brick processes graph change does not happen on the fly.
+ * The proces has to be restarted. So this is a check from the
+ * volume set option such that if debug xlators such as trace/errorgen
+ * are provided in the set command, warn the user.
+ */
+ debug_xlator = is_server_debug_xlator (myframe);
+
+ if (dict_get_str (dict, "help-str", &help_str) && !msg[0])
+ snprintf (msg, sizeof (msg), "Set volume %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+ if (rsp.op_ret == 0 && debug_xlator) {
+ snprintf (tmp_str, sizeof (tmp_str), "\n%s translator has been "
+ "added to the server volume file. Please restart the"
+ " volume for enabling the translator", debug_xlator);
+ }
+
+ if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) {
+ ret = cli_xml_output_str ("volSet", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
goto out;
+ }
- if (dict_get_str (dict, "help-str", &help_str))
- cli_out ("Set volume %s", (rsp.op_ret) ? "unsuccessful":
- "successful");
- else
- cli_out ("%s", help_str);
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, ""))
+ cli_err ("volume set: failed: %s", rsp.op_errstr);
+ else
+ cli_err ("volume set: failed");
+ } else {
+ if (help_str == NULL) {
+ if (debug_xlator == NULL)
+ cli_out ("volume set: success");
+ else
+ cli_out ("volume set: success%s", tmp_str);
+ }else {
+ cli_out ("%s", help_str);
+ }
+ }
ret = rsp.op_ret;
out:
+ if (dict)
+ dict_unref (dict);
+ GF_FREE (debug_xlator);
cli_cmd_broadcast_response (ret);
return ret;
}
int
-gf_cli3_1_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_add_brick_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_add_brick_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -1067,18 +1721,30 @@ gf_cli3_1_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
gf_log ("cli", GF_LOG_INFO, "Received resp to add brick");
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
else
- cli_out ("Add Brick %s", (rsp.op_ret) ? "unsuccessful":
- "successful");
+ snprintf (msg, sizeof (msg), "Add Brick %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volAddBrick", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume add-brick: failed: %s", rsp.op_errstr);
+ else
+ cli_out ("volume add-brick: success");
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (rsp.volname)
- free (rsp.volname);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
return ret;
}
@@ -1086,128 +1752,348 @@ int
gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf2_cli_defrag_vol_rsp rsp = {0,};
+ gf_cli_rsp rsp = {0,};
char *status = "unknown";
- int ret = 0;
+ int ret = -1;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ dict_t *dict = NULL;
+ char msg[1024] = {0,};
+ char key[256] = {0,};
+ int32_t i = 1;
+ int32_t counter = 0;
+ char *node_name = 0;
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ double elapsed = 0;
+ char *size_str = NULL;
+ int32_t command = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ char *cmd_str = "unknown";
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf2_cli_defrag_vol_rsp);
+ (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- ret = rsp.op_ret;
- if (rsp.op_ret == -1) {
- if (strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
- else
- cli_out ("failed to get the status of "
- "remove-brick process");
+ if (frame)
+ local = frame->local;
+ ret = dict_get_int32 (local->dict, "command", &command);
+ if (ret)
goto out;
- }
+ cmd = command;
- switch (rsp.op_errno) {
- case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
- case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED:
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
- status = "in progress";
+ switch (cmd) {
+ case GF_OP_CMD_STOP:
+ cmd_str = "stop";
break;
- case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
+ case GF_OP_CMD_STATUS:
+ cmd_str = "status";
break;
- case GF_DEFRAG_STATUS_COMPLETE:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
- status = "completed";
- break;
- case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- case GF_DEFRAG_STATUS_PAUSED:
- status = "paused";
+ default:
break;
}
- if (rsp.files && (rsp.op_errno == 1)) {
- cli_out ("remove-brick %s: fixed layout %"PRId64,
- status, rsp.files);
+ ret = rsp.op_ret;
+ if (rsp.op_ret == -1) {
+ if (strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "volume remove-brick %s: "
+ "failed: %s", cmd_str, rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "volume remove-brick %s: "
+ "failed", cmd_str);
+
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+
+ cli_err ("%s", msg);
goto out;
}
- if (rsp.files && (rsp.op_errno == 6)) {
- cli_out ("remove-brick %s: fixed layout %"PRId64,
- status, rsp.files);
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ strncpy (msg, "failed to unserialize req-buffer to "
+ "dictionary", sizeof (msg));
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ rsp.op_ret = -1;
+ goto xml_output;
+ }
+
+ gf_log ("cli", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+ }
+
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (strcmp (rsp.op_errstr, "")) {
+ ret = cli_xml_output_vol_remove_brick (_gf_true, dict,
+ rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ } else {
+ ret = cli_xml_output_vol_remove_brick (_gf_true, dict,
+ rsp.op_ret,
+ rsp.op_errno,
+ msg);
+ }
goto out;
}
- if (rsp.files) {
- cli_out ("remove-brick %s: decommissioned %"PRId64
- " files of size %"PRId64, status,
- rsp.files, rsp.size);
+
+ ret = dict_get_int32 (dict, "count", &counter);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "count not set");
goto out;
}
- cli_out ("remove-brick %s", status);
+
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run-time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %16s", "---------",
+ "-----------", "-----------", "-----------", "-----------",
+ "-----------","------------", "--------------");
+
+ do {
+ snprintf (key, 256, "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get node-name");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "files-%d", i);
+ ret = dict_get_uint64 (dict, key, &files);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", i);
+ ret = dict_get_uint64 (dict, key, &size);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get size of xfer");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", i);
+ ret = dict_get_uint64 (dict, key, &lookup);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get lookedup file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get status");
+
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &failures);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "Failed to get failure on files");
+
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "Failed to get skipped files");
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_get_double (dict, key, &elapsed);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "Failed to get run-time");
+
+ switch (status_rcd) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ size_str = gf_uint64_2human_readable(size);
+
+ if (strcmp (status, "not started")) {
+ cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"
+ PRIu64 " %13"PRIu64 " %14s %16.2f", node_name,
+ files, size_str, lookup, failures, skipped,
+ status, elapsed);
+ }
+ GF_FREE(size_str);
+
+ i++;
+ } while (i <= counter);
+
+ if ((cmd == GF_OP_CMD_STOP) && (rsp.op_ret == 0)) {
+ cli_out ("'remove-brick' process may be in the middle of a "
+ "file migration.\nThe process will be fully stopped "
+ "once the migration of the file is complete.\nPlease "
+ "check remove-brick process for completion before "
+ "doing any further brick related tasks on the "
+ "volume.");
+ }
out:
- if (rsp.op_errstr)
- free (rsp.op_errstr); //malloced by xdr
- if (rsp.volname)
- free (rsp.volname); //malloced by xdr
+ free (rsp.dict.dict_val); //malloced by xdr
+ if (dict)
+ dict_unref (dict);
cli_cmd_broadcast_response (ret);
return ret;
}
int
-gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_remove_brick_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ char *cmd_str = "unknown";
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ char *task_id_str = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_remove_brick_rsp);
+ frame = myframe;
+ local = frame->local;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
+ ret = dict_get_int32 (local->dict, "command", (int32_t *)&cmd);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "failed to get command");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to unserialize rsp_dict");
+ goto out;
+ }
+ }
+
+ switch (cmd) {
+ case GF_OP_CMD_START:
+ cmd_str = "start";
+
+ ret = dict_get_str (rsp_dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "remove-brick-id is not present in dict");
+ }
+ break;
+ case GF_OP_CMD_COMMIT:
+ cmd_str = "commit";
+ break;
+ case GF_OP_CMD_COMMIT_FORCE:
+ cmd_str = "commit force";
+ break;
+ default:
+ cmd_str = "unknown";
+ break;
+ }
+
gf_log ("cli", GF_LOG_INFO, "Received resp to remove brick");
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
else
- cli_out ("Remove Brick %s", (rsp.op_ret) ? "unsuccessful":
- "successful");
+ snprintf (msg, sizeof (msg), "Remove Brick %s %s", cmd_str,
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_remove_brick (_gf_false, rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ msg);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ cli_err ("volume remove-brick %s: failed: %s", cmd_str,
+ msg);
+ } else {
+ cli_out ("volume remove-brick %s: success", cmd_str);
+ if (GF_OP_CMD_START == cmd && task_id_str != NULL)
+ cli_out ("ID: %s", task_id_str);
+ }
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (rsp.volname)
- free (rsp.volname);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
+
return ret;
}
int
-gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_replace_brick_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
cli_local_t *local = NULL;
call_frame_t *frame = NULL;
dict_t *dict = NULL;
@@ -1216,6 +2102,9 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
char *status_reply = NULL;
gf1_cli_replace_op replace_op = 0;
char *rb_operation_str = NULL;
+ dict_t *rsp_dict = NULL;
+ char msg[1024] = {0,};
+ char *task_id_str = NULL;
if (-1 == req->rpc_status) {
goto out;
@@ -1223,15 +2112,16 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
frame = (call_frame_t *) myframe;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_replace_brick_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
local = frame->local;
GF_ASSERT (local);
- dict = local->u.replace_brick.dict;
+ dict = local->dict;
ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op);
if (ret) {
@@ -1240,36 +2130,77 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize rsp buffer to dictionary");
+ goto out;
+ }
+ }
+
switch (replace_op) {
case GF_REPLACE_OP_START:
- if (rsp.op_ret)
- rb_operation_str = "replace-brick failed to start";
- else
- rb_operation_str = "replace-brick started successfully";
+ if (rsp.op_ret) {
+ rb_operation_str = gf_strdup ("replace-brick failed to"
+ " start");
+ } else {
+ ret = dict_get_str (rsp_dict, GF_REPLACE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get "
+ "\"replace-brick-id\" from dict");
+ goto out;
+ }
+ ret = gf_asprintf (&rb_operation_str,
+ "replace-brick started successfully"
+ "\nID: %s", task_id_str);
+ if (ret < 0)
+ goto out;
+ }
break;
case GF_REPLACE_OP_STATUS:
- status_reply = rsp.status;
- if (rsp.op_ret || ret)
- rb_operation_str = "replace-brick status unknown";
- else
- rb_operation_str = status_reply;
+ if (rsp.op_ret || ret) {
+ rb_operation_str = gf_strdup ("replace-brick status "
+ "unknown");
+ } else {
+ ret = dict_get_str (rsp_dict, "status-reply",
+ &status_reply);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to"
+ "get status");
+ goto out;
+ }
+
+ rb_operation_str = gf_strdup (status_reply);
+ }
break;
case GF_REPLACE_OP_PAUSE:
if (rsp.op_ret)
- rb_operation_str = "replace-brick pause failed";
+ rb_operation_str = gf_strdup ("replace-brick pause "
+ "failed");
else
- rb_operation_str = "replace-brick paused successfully";
+ rb_operation_str = gf_strdup ("replace-brick paused "
+ "successfully");
break;
case GF_REPLACE_OP_ABORT:
if (rsp.op_ret)
- rb_operation_str = "replace-brick abort failed";
+ rb_operation_str = gf_strdup ("replace-brick abort "
+ "failed");
else
- rb_operation_str = "replace-brick aborted successfully";
+ rb_operation_str = gf_strdup ("replace-brick aborted "
+ "successfully");
break;
case GF_REPLACE_OP_COMMIT:
@@ -1290,9 +2221,11 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
if (rsp.op_ret || ret)
- rb_operation_str = "replace-brick commit failed";
+ rb_operation_str = gf_strdup ("replace-brick commit "
+ "failed");
else
- rb_operation_str = "replace-brick commit successful";
+ rb_operation_str = gf_strdup ("replace-brick commit "
+ "successful");
break;
@@ -1303,141 +2236,140 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
}
if (rsp.op_ret && (strcmp (rsp.op_errstr, ""))) {
- rb_operation_str = rsp.op_errstr;
+ rb_operation_str = gf_strdup (rsp.op_errstr);
}
gf_log ("cli", GF_LOG_INFO, "Received resp to replace brick");
- cli_out ("%s",
- rb_operation_str ? rb_operation_str : "Unknown operation");
-
- ret = rsp.op_ret;
-
-out:
- if (local) {
- dict_unref (local->u.replace_brick.dict);
- GF_FREE (local->u.replace_brick.volname);
- cli_local_wipe (local);
- }
-
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ snprintf (msg, sizeof (msg), "%s",
+ rb_operation_str ? rb_operation_str : "Unknown operation");
-static int
-gf_cli3_1_log_filename_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_log_filename_rsp rsp = {0,};
- int ret = -1;
-
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_log_filename_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_replace_brick (replace_op, rsp_dict,
+ rsp.op_ret,
+ rsp.op_errno, msg);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
goto out;
}
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to log filename");
-
- if (rsp.op_ret && strcmp (rsp.errstr, ""))
- cli_out ("%s", rsp.errstr);
+ if (rsp.op_ret)
+ cli_err ("volume replace-brick: failed: %s", msg);
else
- cli_out ("log filename : %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
-
+ cli_out ("volume replace-brick: success: %s", msg);
ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
-}
-
-static int
-gf_cli3_1_log_locate_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_log_locate_rsp rsp = {0,};
- int ret = -1;
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+ if (frame)
+ frame->local = NULL;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_log_locate_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ if (local) {
+ dict_unref (local->dict);
+ cli_local_wipe (local);
}
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to log locate");
- cli_out ("log file location: %s", rsp.path);
+ if (rb_operation_str)
+ GF_FREE (rb_operation_str);
- ret = rsp.op_ret;
-
-out:
cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
return ret;
}
+
static int
-gf_cli3_1_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_log_rotate_rsp rsp = {0,};
+ gf_cli_rsp rsp = {0,};
int ret = -1;
+ char msg[1024] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_log_rotate_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_DEBUG, "Received resp to log rotate");
- if (rsp.op_ret && strcmp (rsp.errstr, ""))
- cli_out ("%s", rsp.errstr);
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
else
- cli_out ("log rotate %s", (rsp.op_ret) ? "unsuccessful":
- "successful");
+ snprintf (msg, sizeof (msg), "log rotate %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volLogRotate", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume log-rotate: failed: %s", msg);
+ else
+ cli_out ("volume log-rotate: success");
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+
return ret;
}
static int
-gf_cli3_1_sync_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_sync_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_sync_volume_rsp rsp = {0,};
+ gf_cli_rsp rsp = {0,};
int ret = -1;
+ char msg[1024] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_sync_volume_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_DEBUG, "Received resp to sync");
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
+ snprintf (msg, sizeof (msg), "volume sync: failed: %s",
+ rsp.op_errstr);
else
- cli_out ("volume sync: %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ snprintf (msg, sizeof (msg), "volume sync: %s",
+ (rsp.op_ret) ? "failed": "success");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volSync", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("%s", msg);
+ else
+ cli_out ("%s", msg);
ret = rsp.op_ret;
out:
@@ -1446,12 +2378,12 @@ out:
}
int32_t
-gf_cli3_1_print_limit_list (char *volname, char *limit_list,
+gf_cli_print_limit_list (char *volname, char *limit_list,
char *op_errstr)
{
int64_t size = 0;
int64_t limit_value = 0;
- int32_t i, j, k;
+ int32_t i, j;
int32_t len = 0, ret = -1;
char *size_str = NULL;
char path [PATH_MAX] = {0, };
@@ -1459,6 +2391,7 @@ gf_cli3_1_print_limit_list (char *volname, char *limit_list,
char value [1024] = {0, };
char mountdir [] = "/tmp/mntXXXXXX";
char abspath [PATH_MAX] = {0, };
+ char *colon_ptr = NULL;
runner_t runner = {0,};
GF_VALIDATE_OR_GOTO ("cli", volname, out);
@@ -1469,7 +2402,7 @@ gf_cli3_1_print_limit_list (char *volname, char *limit_list,
len = strlen (limit_list);
if (len == 0) {
- cli_out ("%s", op_errstr?op_errstr:"quota limit not set ");
+ cli_err ("%s", op_errstr?op_errstr:"quota limit not set ");
goto out;
}
@@ -1495,7 +2428,7 @@ gf_cli3_1_print_limit_list (char *volname, char *limit_list,
len = strlen (limit_list);
if (len == 0) {
- cli_out ("quota limit not set ");
+ cli_err ("quota limit not set ");
goto unmount;
}
@@ -1506,19 +2439,16 @@ gf_cli3_1_print_limit_list (char *volname, char *limit_list,
"-----------------------");
while (i < len) {
j = 0;
- k = 0;
-
- while (limit_list [i] != ':') {
- path [k++] = limit_list [i++];
- }
- path [k] = '\0';
-
- i++; //skip ':'
while (limit_list [i] != ',' && limit_list [i] != '\0') {
- value [j++] = limit_list[i++];
+ path [j++] = limit_list[i++];
}
- value [j] = '\0';
+ path [j] = '\0';
+ //here path[] contains both path and limit value
+
+ colon_ptr = strrchr (path, ':');
+ *colon_ptr = '\0';
+ strcpy (value, ++colon_ptr);
snprintf (abspath, sizeof (abspath), "%s/%s", mountdir, path);
@@ -1561,64 +2491,155 @@ out:
}
int
-gf_cli3_1_quota_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_quota_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_quota_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *limit_list = NULL;
+ int32_t type = 0;
+ char msg[1024] = {0,};
+ call_frame_t *frame = NULL;
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_quota_rsp);
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
if (rsp.op_ret &&
strcmp (rsp.op_errstr, "") == 0) {
- cli_out ("command unsuccessful %s", rsp.op_errstr);
+ snprintf (msg, sizeof (msg), "command unsuccessful %s",
+ rsp.op_errstr);
+
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
goto out;
}
- if (rsp.type == GF_QUOTA_OPTION_TYPE_LIST) {
- if (rsp.limit_list) {
- gf_cli3_1_print_limit_list (rsp.volname,
- rsp.limit_list,
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get volname");
+
+ ret = dict_get_str (dict, "limit_list", &limit_list);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get limit_list");
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get type");
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST) {
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_quota_limit_list
+ (volname, limit_list, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+
+ }
+
+ if (limit_list) {
+ gf_cli_print_limit_list (volname,
+ limit_list,
rsp.op_errstr);
+ } else {
+ gf_log ("cli", GF_LOG_INFO, "Received resp to quota "
+ "command ");
+ if (rsp.op_errstr)
+ snprintf (msg, sizeof (msg), "%s",
+ rsp.op_errstr);
}
} else {
gf_log ("cli", GF_LOG_INFO, "Received resp to quota command ");
if (rsp.op_errstr)
- cli_out ("%s", rsp.op_errstr);
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
else
- cli_out ("%s", "successful");
+ snprintf (msg, sizeof (msg), "successful");
}
-out:
- ret = rsp.op_ret;
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volQuota", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (strlen (msg) > 0) {
+ if (rsp.op_ret)
+ cli_err ("%s", msg);
+ else
+ cli_out ("%s", msg);
+ }
+ ret = rsp.op_ret;
+out:
cli_cmd_broadcast_response (ret);
+ if (dict)
+ dict_unref (dict);
+
+ free (rsp.dict.dict_val);
+
return ret;
}
int
-gf_cli3_1_getspec_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_getspec_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_getspec_rsp rsp = {0,};
- int ret = 0;
+ int ret = -1;
char *spec = NULL;
+ call_frame_t *frame = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "getspec failed");
goto out;
}
@@ -1642,20 +2663,30 @@ out:
}
int
-gf_cli3_1_pmap_b2p_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_pmap_b2p_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
pmap_port_by_brick_rsp rsp = {0,};
- int ret = 0;
+ int ret = -1;
char *spec = NULL;
+ call_frame_t *frame = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "pump_b2p failed");
goto out;
}
@@ -1673,13 +2704,12 @@ out:
int32_t
-gf_cli3_1_probe (call_frame_t *frame, xlator_t *this,
+gf_cli_probe (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_probe_req req = {0,};
+ gf_cli_req req = {{0,},};
int ret = 0;
dict_t *dict = NULL;
- char *hostname = NULL;
int port = 0;
if (!frame || !this || !data) {
@@ -1688,36 +2718,34 @@ gf_cli3_1_probe (call_frame_t *frame, xlator_t *this,
}
dict = data;
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
ret = dict_get_int32 (dict, "port", &port);
- if (ret)
- port = CLI_GLUSTERD_PORT;
-
- req.hostname = hostname;
- req.port = port;
+ if (ret) {
+ ret = dict_set_int32 (dict, "port", CLI_GLUSTERD_PORT);
+ if (ret)
+ goto out;
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROBE, NULL,
- this, gf_cli3_1_probe_cbk,
- (xdrproc_t)xdr_gf1_cli_probe_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_probe_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROBE, this, cli_rpc_prog, NULL);
out:
+ GF_FREE (req.dict.dict_val);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
return ret;
}
int32_t
-gf_cli3_1_deprobe (call_frame_t *frame, xlator_t *this,
+gf_cli_deprobe (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_deprobe_req req = {0,};
+ gf_cli_req req = {{0,},};
int ret = 0;
dict_t *dict = NULL;
- char *hostname = NULL;
int port = 0;
+ int flags = 0;
if (!frame || !this || !data) {
ret = -1;
@@ -1725,53 +2753,70 @@ gf_cli3_1_deprobe (call_frame_t *frame, xlator_t *this,
}
dict = data;
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
-
ret = dict_get_int32 (dict, "port", &port);
- if (ret)
- port = CLI_GLUSTERD_PORT;
+ if (ret) {
+ ret = dict_set_int32 (dict, "port", CLI_GLUSTERD_PORT);
+ if (ret)
+ goto out;
+ }
- req.hostname = hostname;
- req.port = port;
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ ret = dict_set_int32 (dict, "flags", 0);
+ if (ret)
+ goto out;
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEPROBE, NULL,
- this, gf_cli3_1_deprobe_cbk,
- (xdrproc_t)xdr_gf1_cli_deprobe_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_deprobe_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DEPROBE, this, cli_rpc_prog, NULL);
out:
+ GF_FREE (req.dict.dict_val);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
return ret;
}
int32_t
-gf_cli3_1_list_friends (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_list_friends (call_frame_t *frame, xlator_t *this,
+ void *data)
{
gf1_cli_peer_list_req req = {0,};
int ret = 0;
+ unsigned long flags = 0;
if (!frame || !this) {
ret = -1;
goto out;
}
- req.flags = GF_CLI_LIST_ALL;
+ GF_ASSERT (frame->local == NULL);
+ flags = (long)data;
+ req.flags = flags;
+ frame->local = (void*)flags;
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_LIST_FRIENDS, NULL,
- this, gf_cli3_1_list_friends_cbk,
+ this, gf_cli_list_friends_cbk,
(xdrproc_t) xdr_gf1_cli_peer_list_req);
out:
+ if (ret) {
+ /*
+ * If everything goes fine, gf_cli_list_friends_cbk()
+ * [invoked through cli_cmd_submit()]resets the
+ * frame->local to NULL. In case cli_cmd_submit()
+ * fails in between, RESET frame->local here.
+ */
+ frame->local = NULL;
+ }
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
-gf_cli3_1_get_next_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_get_next_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
@@ -1785,37 +2830,58 @@ gf_cli3_1_get_next_volume (call_frame_t *frame, xlator_t *this,
}
ctx = data;
+ local = frame->local;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_info_begin (local, 0, 0, "");
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Error outputting to xml");
+ goto out;
+ }
+ }
- ret = gf_cli3_1_get_volume (frame, this, data);
+ ret = gf_cli_get_volume (frame, this, data);
- local = frame->local;
- if (!local || !local->u.get_vol.volname) {
- cli_out ("No volumes present");
+ if (!local || !local->get_vol.volname) {
+ if ((global_state->mode & GLUSTER_MODE_XML))
+ goto end_xml;
+
+ cli_err ("No volumes present");
goto out;
}
- ctx->volname = local->u.get_vol.volname;
+
+ ctx->volname = local->get_vol.volname;
while (ctx->volname) {
- ret = gf_cli3_1_get_volume (frame, this, ctx);
+ ret = gf_cli_get_volume (frame, this, ctx);
if (ret)
goto out;
- ctx->volname = local->u.get_vol.volname;
+ ctx->volname = local->get_vol.volname;
+ }
+
+end_xml:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_info_end (local);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR, "Error outputting to xml");
}
out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
-gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_get_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_get_vol_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
cli_cmd_volume_get_ctx_t *ctx = NULL;
dict_t *dict = NULL;
+ int32_t flags = 0;
if (!frame || !this || !data) {
ret = -1;
@@ -1823,7 +2889,6 @@ gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
}
ctx = data;
- req.flags = ctx->flags;
dict = dict_new ();
if (!dict)
@@ -1835,146 +2900,151 @@ gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
goto out;
}
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ flags = ctx->flags;
+ ret = dict_set_int32 (dict, "flags", flags);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to set flags");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_GET_VOLUME, NULL,
- this, gf_cli3_1_get_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_get_vol_req);
+ this, gf_cli_get_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req);
out:
+ if (dict)
+ dict_unref (dict);
+
+ GF_FREE (req.dict.dict_val);
+
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
-
int32_t
-gf_cli3_1_create_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli3_1_uuid_get (call_frame_t *frame, xlator_t *this,
+ void *data)
{
- gf1_cli_create_vol_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
- cli_local_t *local = NULL;
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
+ if (!frame || !this || !data) {
ret = -1;
goto out;
}
- dict = dict_ref ((dict_t *)data);
+ dict = data;
+ ret = cli_to_glusterd (&req, frame, gf_cli3_1_uuid_get_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_UUID_GET, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &req.volname);
+int32_t
+gf_cli3_1_uuid_reset (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (ret)
+ if (!frame || !this || !data) {
+ ret = -1;
goto out;
+ }
- ret = dict_get_int32 (dict, "type", (int32_t *)&req.type);
-
- if (ret)
- goto out;
+ dict = data;
+ ret = cli_to_glusterd (&req, frame, gf_cli3_1_uuid_reset_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_UUID_RESET, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- ret = dict_get_int32 (dict, "count", &req.count);
- if (ret)
- goto out;
+int32_t
+gf_cli_create_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
- ret = dict_allocate_and_serialize (dict,
- &req.bricks.bricks_val,
- (size_t *)&req.bricks.bricks_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
+ if (!frame || !this || !data) {
+ ret = -1;
goto out;
}
- local = cli_local_get ();
-
- if (local) {
- local->u.create_vol.dict = dict_ref (dict);
- frame->local = local;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_CREATE_VOLUME, NULL,
- this, gf_cli3_1_create_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_create_vol_req);
-
+ dict = data;
+ ret = cli_to_glusterd (&req, frame, gf_cli_create_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_CREATE_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (dict)
- dict_unref (dict);
-
- if (req.bricks.bricks_val) {
- GF_FREE (req.bricks.bricks_val);
- }
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_delete_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_delete_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_delete_vol_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
+ dict_t *dict = NULL;
if (!frame || !this || !data) {
ret = -1;
goto out;
}
- local = cli_local_get ();
-
- if (local) {
- local->u.delete_vol.volname = data;
- frame->local = local;
- }
-
- req.volname = data;
+ dict = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DELETE_VOLUME, NULL,
- this, gf_cli3_1_delete_volume_cbk,
- (xdrproc_t)xdr_gf1_cli_delete_vol_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_delete_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DELETE_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
+ GF_FREE (req.dict.dict_val);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
-gf_cli3_1_start_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_start_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_start_vol_req *req = NULL;
+ gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
+ dict_t *dict = NULL;
if (!frame || !this || !data) {
ret = -1;
goto out;
}
- req = data;
- local = cli_local_get ();
-
- if (local) {
- local->u.start_vol.volname = req->volname;
- local->u.start_vol.flags = req->flags;
- frame->local = local;
- }
+ dict = data;
- ret = cli_cmd_submit (req, frame, cli_rpc_prog,
- GLUSTER_CLI_START_VOLUME, NULL,
- this, gf_cli3_1_start_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_start_vol_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_start_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_START_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -1983,31 +3053,24 @@ out:
}
int32_t
-gf_cli3_1_stop_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_stop_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_stop_vol_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
+ dict_t *dict = data;
if (!frame || !this || !data) {
ret = -1;
goto out;
}
- req = *((gf1_cli_stop_vol_req*)data);
- local = cli_local_get ();
-
- if (local) {
- local->u.stop_vol.volname = req.volname;
- local->u.stop_vol.flags = req.flags;
- frame->local = local;
- }
+ dict = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STOP_VOLUME, NULL,
- this, gf_cli3_1_stop_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_stop_vol_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_stop_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STOP_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -2016,14 +3079,11 @@ out:
}
int32_t
-gf_cli3_1_defrag_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_defrag_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_defrag_vol_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
- char *volname = NULL;
- char *cmd_str = NULL;
dict_t *dict = NULL;
if (!frame || !this || !data) {
@@ -2033,55 +3093,10 @@ gf_cli3_1_defrag_volume (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- gf_log ("", GF_LOG_DEBUG, "error");
-
- ret = dict_get_str (dict, "command", &cmd_str);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "error");
- goto out;
- }
-
- if (strcmp (cmd_str, "start") == 0) {
- req.cmd = GF_DEFRAG_CMD_START;
- ret = dict_get_str (dict, "start-type", &cmd_str);
- if (!ret) {
- if (strcmp (cmd_str, "fix-layout") == 0) {
- req.cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
- }
- if (strcmp (cmd_str, "migrate-data") == 0) {
- req.cmd = GF_DEFRAG_CMD_START_MIGRATE_DATA;
- }
- if (strcmp (cmd_str, "migrate-data-force") == 0) {
- req.cmd = GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE;
- }
- }
- goto done;
- }
- if (strcmp (cmd_str, "stop") == 0) {
- req.cmd = GF_DEFRAG_CMD_STOP;
- goto done;
- }
- if (strcmp (cmd_str, "status") == 0) {
- req.cmd = GF_DEFRAG_CMD_STATUS;
- }
-
-done:
- local = cli_local_get ();
-
- if (local) {
- local->u.defrag_vol.volname = gf_strdup (volname);
- local->u.defrag_vol.cmd = req.cmd;
- frame->local = local;
- }
-
- req.volname = volname;
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEFRAG_VOLUME, NULL,
- this, gf_cli3_1_defrag_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_defrag_vol_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_defrag_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DEFRAG_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -2090,10 +3105,10 @@ out:
}
int32_t
-gf_cli3_1_rename_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_rename_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_rename_vol_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
dict_t *dict = NULL;
@@ -2104,20 +3119,20 @@ gf_cli3_1_rename_volume (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "old-volname", &req.old_volname);
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to serialize the data");
- if (ret)
goto out;
+ }
- ret = dict_get_str (dict, "new-volname", &req.new_volname);
-
- if (ret)
- goto out;
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_RENAME_VOLUME, NULL,
- this, gf_cli3_1_rename_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_rename_vol_req);
+ this, gf_cli_rename_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -2126,10 +3141,10 @@ out:
}
int32_t
-gf_cli3_1_reset_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_reset_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_reset_vol_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
dict_t *dict = NULL;
@@ -2140,36 +3155,21 @@ gf_cli3_1_reset_volume (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
-
- if (ret)
- goto out;
-
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_RESET_VOLUME, NULL,
- this, gf_cli3_1_reset_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_reset_vol_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_reset_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_RESET_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
-gf_cli3_1_set_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_set_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_set_vol_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
dict_t *dict = NULL;
@@ -2180,24 +3180,10 @@ gf_cli3_1_set_volume (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
-
- if (ret)
- goto out;
-
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_SET_VOLUME, NULL,
- this, gf_cli3_1_set_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_set_vol_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_set_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SET_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -2206,12 +3192,14 @@ out:
}
int32_t
-gf_cli3_1_add_brick (call_frame_t *frame, xlator_t *this,
+gf_cli_add_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_add_brick_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
dict_t *dict = NULL;
+ char *volname = NULL;
+ int32_t count = 0;
if (!frame || !this || !data) {
ret = -1;
@@ -2220,50 +3208,39 @@ gf_cli3_1_add_brick (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
+ ret = dict_get_str (dict, "volname", &volname);
if (ret)
goto out;
- ret = dict_get_int32 (dict, "count", &req.count);
+ ret = dict_get_int32 (dict, "count", &count);
if (ret)
goto out;
-
- ret = dict_allocate_and_serialize (dict,
- &req.bricks.bricks_val,
- (size_t *)&req.bricks.bricks_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_ADD_BRICK, NULL,
- this, gf_cli3_1_add_brick_cbk,
- (xdrproc_t) xdr_gf1_cli_add_brick_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_add_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_ADD_BRICK, this, cli_rpc_prog, NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.bricks.bricks_val) {
- GF_FREE (req.bricks.bricks_val);
- }
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
+gf_cli_remove_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_remove_brick_req req = {0,};
- gf1_cli_defrag_vol_req status_req = {0,};
+ gf_cli_req req = {{0,}};;
+ gf_cli_req status_req = {{0,}};;
int ret = 0;
dict_t *dict = NULL;
int32_t command = 0;
char *volname = NULL;
+ dict_t *req_dict = NULL;
+ int32_t cmd = 0;
if (!frame || !this || !data) {
ret = -1;
@@ -2276,102 +3253,97 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
if (ret)
goto out;
- ret = dict_get_int32 (dict, "count", &req.count);
- if (ret)
- goto out;
-
ret = dict_get_int32 (dict, "command", &command);
if (ret)
goto out;
- if (command != GF_OP_CMD_STATUS) {
- req.volname = volname;
+ if ((command != GF_OP_CMD_STATUS) &&
+ (command != GF_OP_CMD_STOP)) {
- ret = dict_allocate_and_serialize (dict,
- &req.bricks.bricks_val,
- (size_t *)&req.bricks.bricks_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_remove_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_REMOVE_BRICK, this,
+ cli_rpc_prog, NULL);
+ } else {
+ /* Need rebalance status to be sent :-) */
+ req_dict = dict_new ();
+ if (!req_dict) {
+ ret = -1;
goto out;
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_REMOVE_BRICK, NULL,
- this, gf_cli3_1_remove_brick_cbk,
- (xdrproc_t) xdr_gf1_cli_remove_brick_req);
- } else {
- /* Need rebalance status to e sent :-) */
- status_req.volname = volname;
- status_req.cmd = GF_DEFRAG_CMD_STATUS;
+ ret = dict_set_str (req_dict, "volname", volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict");
+ goto out;
+ }
- ret = cli_cmd_submit (&status_req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEFRAG_VOLUME, NULL,
- this, gf_cli3_remove_brick_status_cbk,
- (xdrproc_t) xdr_gf1_cli_defrag_vol_req);
+ if (command == GF_OP_CMD_STATUS)
+ cmd |= GF_DEFRAG_CMD_STATUS;
+ else
+ cmd |= GF_DEFRAG_CMD_STOP;
- }
+ ret = dict_set_int32 (req_dict, "rebalance-command", (int32_t) cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict");
+ goto out;
+ }
+
+ ret = cli_to_glusterd (&status_req, frame,
+ gf_cli3_remove_brick_status_cbk,
+ (xdrproc_t) xdr_gf_cli_req, req_dict,
+ GLUSTER_CLI_DEFRAG_VOLUME, this,
+ cli_rpc_prog, NULL);
+
+ }
out:
+ if (req_dict)
+ dict_unref (req_dict);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.bricks.bricks_val) {
- GF_FREE (req.bricks.bricks_val);
- }
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (status_req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_replace_brick (call_frame_t *frame, xlator_t *this,
+gf_cli_replace_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_replace_brick_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
dict_t *dict = NULL;
char *src_brick = NULL;
char *dst_brick = NULL;
+ char *volname = NULL;
+ int32_t op = 0;
if (!frame || !this || !data) {
ret = -1;
goto out;
}
- dict = data;
-
- local = cli_local_get ();
- if (!local) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
-
- local->u.replace_brick.dict = dict_ref (dict);
- frame->local = local;
+ dict = data;
- ret = dict_get_int32 (dict, "operation", (int32_t *)&req.op);
+ ret = dict_get_int32 (dict, "operation", &op);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"dict_get on operation failed");
goto out;
}
- ret = dict_get_str (dict, "volname", &req.volname);
+ ret = dict_get_str (dict, "volname", &volname);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"dict_get on volname failed");
goto out;
}
- local->u.replace_brick.volname = gf_strdup (req.volname);
- if (!local->u.replace_brick.volname) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
ret = dict_get_str (dict, "src-brick", &src_brick);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -2387,40 +3359,29 @@ gf_cli3_1_replace_brick (call_frame_t *frame, xlator_t *this,
}
gf_log (this->name, GF_LOG_DEBUG,
- "Recevied command replace-brick %s with "
+ "Received command replace-brick %s with "
"%s with operation=%d", src_brick,
- dst_brick, req.op);
-
-
- ret = dict_allocate_and_serialize (dict,
- &req.bricks.bricks_val,
- (size_t *)&req.bricks.bricks_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+ dst_brick, op);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_REPLACE_BRICK, NULL,
- this, gf_cli3_1_replace_brick_cbk,
- (xdrproc_t) xdr_gf1_cli_replace_brick_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_replace_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_REPLACE_BRICK, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.bricks.bricks_val) {
- GF_FREE (req.bricks.bricks_val);
- }
+ GF_FREE (req.dict.dict_val);
return ret;
}
+
int32_t
-gf_cli3_1_log_filename (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_log_rotate (call_frame_t *frame, xlator_t *this,
+ void *data)
{
- gf1_cli_log_filename_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
dict_t *dict = NULL;
@@ -2431,67 +3392,25 @@ gf_cli3_1_log_filename (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
- if (ret)
- goto out;
-
- ret = dict_get_str (dict, "brick", &req.brick);
- if (ret)
- req.brick = "";
-
- ret = dict_get_str (dict, "path", &req.path);
- if (ret)
- goto out;
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LOG_FILENAME, NULL,
- this, gf_cli3_1_log_filename_cbk,
- (xdrproc_t) xdr_gf1_cli_log_filename_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_log_rotate_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_LOG_ROTATE, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
-
-static int
-gf_cli3_1_log_level_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_log_level_rsp rsp = {0,};
- int ret = -1;
-
- if (req->rpc_status == -1)
- goto out;
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_log_level_rsp);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR, "log level response error");
- goto out;
- }
-
- gf_log ("cli", GF_LOG_DEBUG, "Received response to log level cmd");
-
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
- else
- cli_out ("log level set: %s", (rsp.op_ret) ? "unsuccessful" :
- "successful");
-
- ret = rsp.op_ret;
-
- out:
- cli_cmd_broadcast_response (ret);
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_log_level (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_sync_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
{
- gf1_cli_log_level_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
+ int ret = 0;
+ gf_cli_req req = {{0,}};
+ dict_t *dict = NULL;
if (!frame || !this || !data) {
ret = -1;
@@ -2500,36 +3419,26 @@ gf_cli3_1_log_level (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
- if (ret)
- goto out;
-
- ret = dict_get_str (dict, "xlator", &req.xlator);
- if (ret)
- goto out;
-
- ret = dict_get_str (dict, "loglevel", &req.loglevel);
- if (ret)
- goto out;
+ ret = cli_to_glusterd (&req, frame, gf_cli_sync_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SYNC_VOLUME, this, cli_rpc_prog,
+ NULL);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LOG_LEVEL, NULL,
- this, gf_cli3_1_log_level_cbk,
- (xdrproc_t) xdr_gf1_cli_log_level_req);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE (req.dict.dict_val);
- out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning: %d", ret);
return ret;
}
-
int32_t
-gf_cli3_1_log_locate (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_getspec (call_frame_t *frame, xlator_t *this,
+ void *data)
{
- gf1_cli_log_locate_req req = {0,};
+ gf_getspec_req req = {0,};
int ret = 0;
- dict_t *dict = NULL;
+ dict_t *dict = NULL;
+ dict_t *op_dict = NULL;
if (!frame || !this || !data) {
ret = -1;
@@ -2538,117 +3447,59 @@ gf_cli3_1_log_locate (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
+ ret = dict_get_str (dict, "volid", &req.key);
if (ret)
goto out;
- ret = dict_get_str (dict, "brick", &req.brick);
- if (ret)
- req.brick = "";
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LOG_LOCATE, NULL,
- this, gf_cli3_1_log_locate_cbk,
- (xdrproc_t) xdr_gf1_cli_log_locate_req);
-
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
-}
-
-int32_t
-gf_cli3_1_log_rotate (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf1_cli_log_locate_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
+ op_dict = dict_new ();
+ if (!dict) {
ret = -1;
goto out;
}
- dict = data;
-
- ret = dict_get_str (dict, "volname", &req.volname);
- if (ret)
- goto out;
-
- ret = dict_get_str (dict, "brick", &req.brick);
- if (ret)
- req.brick = "";
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LOG_ROTATE, NULL,
- this, gf_cli3_1_log_rotate_cbk,
- (xdrproc_t) xdr_gf1_cli_log_rotate_req);
-
-
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
-}
-
-int32_t
-gf_cli3_1_sync_volume (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- int ret = 0;
-
- if (!frame || !this || !data) {
- ret = -1;
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32 (op_dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set min-op-version"
+ " in request dict");
goto out;
}
- ret = cli_cmd_submit ((gf1_cli_sync_volume_req*)data, frame,
- cli_rpc_prog, GLUSTER_CLI_SYNC_VOLUME,
- NULL, this, gf_cli3_1_sync_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_sync_volume_req);
-
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
-}
-
-int32_t
-gf_cli3_1_getspec (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf_getspec_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
+ ret = dict_set_int32 (op_dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set max-op-version"
+ " in request dict");
goto out;
}
- dict = data;
-
- ret = dict_get_str (dict, "volid", &req.key);
- if (ret)
+ ret = dict_allocate_and_serialize (op_dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize dictionary");
goto out;
+ }
ret = cli_cmd_submit (&req, frame, &cli_handshake_prog,
GF_HNDSK_GETSPEC, NULL,
- this, gf_cli3_1_getspec_cbk,
+ this, gf_cli_getspec_cbk,
(xdrproc_t) xdr_gf_getspec_req);
out:
+ if (op_dict) {
+ dict_unref(op_dict);
+ }
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
-gf_cli3_1_quota (call_frame_t *frame, xlator_t *this,
+gf_cli_quota (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_quota_req req = {0,};
+ gf_cli_req req = {{0,}};
int ret = 0;
dict_t *dict = NULL;
@@ -2659,32 +3510,18 @@ gf_cli3_1_quota (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
+ ret = cli_to_glusterd (&req, frame, gf_cli_quota_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_QUOTA, this, cli_rpc_prog, NULL);
- if (ret)
- goto out;
-
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_QUOTA, NULL,
- this, gf_cli3_1_quota_cbk,
- (xdrproc_t) xdr_gf1_cli_quota_req);
-
- GF_FREE (req.dict.dict_val);
out:
+ GF_FREE (req.dict.dict_val);
+
return ret;
}
int32_t
-gf_cli3_1_pmap_b2p (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_pmap_b2p (call_frame_t *frame, xlator_t *this, void *data)
{
pmap_port_by_brick_req req = {0,};
int ret = 0;
@@ -2703,7 +3540,7 @@ gf_cli3_1_pmap_b2p (call_frame_t *frame, xlator_t *this, void *data)
ret = cli_cmd_submit (&req, frame, &cli_pmap_prog,
GF_PMAP_PORTBYBRICK, NULL,
- this, gf_cli3_1_pmap_b2p_cbk,
+ this, gf_cli_pmap_b2p_cbk,
(xdrproc_t) xdr_pmap_port_by_brick_req);
out:
@@ -2713,7 +3550,7 @@ out:
}
static int
-gf_cli3_1_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf1_cli_fsm_log_rsp rsp = {0,};
@@ -2733,14 +3570,15 @@ gf_cli3_1_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
if (rsp.op_ret) {
if (strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
- cli_out ("fsm log unsuccessful");
+ cli_err ("%s", rsp.op_errstr);
+ cli_err ("fsm log unsuccessful");
ret = rsp.op_ret;
goto out;
}
@@ -2756,7 +3594,7 @@ gf_cli3_1_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
&dict);
if (ret) {
- cli_out ("bad response");
+ cli_err ("bad response");
goto out;
}
@@ -2764,7 +3602,7 @@ gf_cli3_1_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
if (tr_count)
cli_out("number of transitions: %d", tr_count);
else
- cli_out("No transitions");
+ cli_err("No transitions");
for (i = 0; i < tr_count; i++) {
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "log%d-old-state", i);
@@ -2803,7 +3641,7 @@ out:
}
int32_t
-gf_cli3_1_fsm_log (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_fsm_log (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
gf1_cli_fsm_log_req req = {0,};
@@ -2817,7 +3655,7 @@ gf_cli3_1_fsm_log (call_frame_t *frame, xlator_t *this, void *data)
req.name = data;
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_FSM_LOG, NULL,
- this, gf_cli3_1_fsm_log_cbk,
+ this, gf_cli_fsm_log_cbk,
(xdrproc_t) xdr_gf1_cli_fsm_log_req);
out:
@@ -2827,14 +3665,17 @@ out:
}
int
-gf_cli3_1_gsync_config_command (dict_t *dict)
+gf_cli_gsync_config_command (dict_t *dict)
{
runner_t runner = {0,};
char *subop = NULL;
char *gwd = NULL;
char *slave = NULL;
+ char *confpath = NULL;
char *master = NULL;
char *op_name = NULL;
+ int ret = -1;
+ char conf_path[PATH_MAX] = "";
if (dict_get_str (dict, "subop", &subop) != 0)
return -1;
@@ -2853,9 +3694,17 @@ gf_cli3_1_gsync_config_command (dict_t *dict)
if (dict_get_str (dict, "op_name", &op_name) != 0)
op_name = NULL;
+ ret = dict_get_str (dict, "conf_path", &confpath);
+ if (!confpath) {
+ ret = snprintf (conf_path, sizeof(conf_path) - 1,
+ "%s/"GEOREP"/gsyncd_template.conf", gwd);
+ conf_path[ret] = '\0';
+ confpath = conf_path;
+ }
+
runinit (&runner);
runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gwd);
+ runner_argprintf (&runner, "%s", confpath);
if (master)
runner_argprintf (&runner, ":%s", master);
runner_add_arg (&runner, slave);
@@ -2867,82 +3716,667 @@ gf_cli3_1_gsync_config_command (dict_t *dict)
}
int
-gf_cli3_1_gsync_out_status (dict_t *dict)
+gf_cli_fetch_gsyncd_status_values (char *status,
+ gf_cli_gsync_status_t *sts_val)
{
- int gsync_count = 0;
- int i = 0;
- int ret = 0;
- char mst[PATH_MAX] = {0, };
- char slv[PATH_MAX]= {0, };
- char sts[PATH_MAX] = {0, };
- char hyphens[81] = {0, };
- char *mst_val = NULL;
- char *slv_val = NULL;
- char *sts_val = NULL;
-
- cli_out ("%-20s %-50s %-10s", "MASTER", "SLAVE", "STATUS");
-
- for (i=0; i<sizeof(hyphens)-1; i++)
- hyphens[i] = '-';
+ int32_t ret = -1;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *key = NULL;
+ char *value = NULL;
- cli_out ("%s", hyphens);
+ if (!status || !sts_val) {
+ gf_log ("", GF_LOG_ERROR, "status or sts_val is null");
+ goto out;
+ }
+ tmp = strtok_r (status, "\n", &save_ptr);
- ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (tmp)
+ sts_val->health = gf_strdup (tmp);
+
+ while (tmp) {
+ key = strtok_r (tmp, "=", &value);
+
+ if ((key) && (!strcmp(key, "Uptime")))
+ sts_val->uptime = gf_strdup (value);
+
+ if ((key) && (!strcmp(key, "FilesSyncd")))
+ sts_val->files_syncd = gf_strdup (value);
+
+ if ((key) && (!strcmp(key, "FilesPending")))
+ sts_val->files_pending = gf_strdup (value);
+
+ if ((key) && (!strcmp(key, "BytesPending"))) {
+ value = gf_uint64_2human_readable(atol(value));
+ sts_val->bytes_pending = gf_strdup (value);
+ }
+
+ if ((key) && (!strcmp(key, "DeletesPending")))
+ sts_val->deletes_pending = gf_strdup (value);
+
+ tmp = strtok_r (NULL, ";", &save_ptr);
+ }
+
+ if (sts_val->health)
+ ret = 0;
+
+ if (!sts_val->uptime)
+ sts_val->uptime = gf_strdup ("N/A");
+
+ if (!sts_val->files_syncd)
+ sts_val->files_syncd = gf_strdup ("N/A");
+
+ if (!sts_val->files_pending)
+ sts_val->files_pending = gf_strdup ("N/A");
+
+ if (!sts_val->bytes_pending)
+ sts_val->bytes_pending = gf_strdup ("N/A");
+
+ if (!sts_val->deletes_pending)
+ sts_val->deletes_pending = gf_strdup ("N/A");
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret);
+ return ret;
+}
+
+char*
+get_struct_variable (int mem_num, gf_cli_gsync_status_t *sts_val)
+{
+ switch (mem_num) {
+ case 0: return (sts_val->node);
+ case 1: return (sts_val->master);
+ case 2: return (sts_val->slave);
+ case 3: return (sts_val->health);
+ case 4: return (sts_val->uptime);
+ case 5: return (sts_val->files_syncd);
+ case 6: return (sts_val->files_pending);
+ case 7: return (sts_val->bytes_pending);
+ case 8: return (sts_val->deletes_pending);
+ default:
+ goto out;
+ }
+
+out:
+ return NULL;
+}
+
+int
+gf_cli_print_status (char **title_values,
+ gf_cli_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count,
+ int number_of_fields, int is_detail)
+{
+ int indents = 0;
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int total_spacing = 0;
+ char **output_values = NULL;
+ char *tmp = NULL;
+ char *hyphens = NULL;
+ char heading[PATH_MAX] = {0, };
+ char indent_spaces[PATH_MAX] = {0, };
+
+ /* calculating spacing for hyphens */
+ for (i = 0; i < number_of_fields; i++) {
+ /* Suppressing master and slave output for status detail */
+ if ((is_detail) && ((i == 1) || (i == 2))) {
+ total_spacing++;
+ continue;
+ } else if ((!is_detail) && (i > 4)) {
+ /* Suppressing detailed output for
+ * status */
+ continue;
+ }
+ spacing[i] += 3; /* Adding extra space to
+ distinguish between fields */
+ total_spacing += spacing[i];
+ }
+ total_spacing += 4; /* For the spacing between the fields */
+
+ /* char pointers for each field */
+ output_values = GF_CALLOC (number_of_fields, sizeof (char *),
+ gf_common_mt_char);
+ if (!output_values) {
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < number_of_fields; i++) {
+ output_values[i] = GF_CALLOC (spacing[i] + 1, sizeof (char),
+ gf_common_mt_char);
+ if (!output_values[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ hyphens = GF_CALLOC (total_spacing + 1, sizeof (char),
+ gf_common_mt_char);
+ if (!hyphens) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf(heading, sizeof(heading), "MASTER: %s SLAVE: %s",
+ sts_vals[0]->master, sts_vals[0]->slave);
if (ret) {
- gf_log ("cli", GF_LOG_INFO, "No active geo-replication sessions"
- "present for the selected");
+ if (ret < sizeof(heading))
+ heading[ret] = '\0';
+ else
+ heading[sizeof(heading) - 1] = '\0';
ret = 0;
+ } else {
+ ret = -1;
goto out;
}
- for (i = 1; i <= gsync_count; i++) {
- snprintf (mst, sizeof(mst), "master%d", i);
- snprintf (slv, sizeof(slv), "slave%d", i);
- snprintf (sts, sizeof(sts), "status%d", i);
+ if (is_detail) {
+ cli_out (" ");
+ if (strlen(heading) > total_spacing)
+ cli_out ("%s", heading);
+ else {
+ /* Printing the heading with centre justification */
+ indents = (total_spacing - strlen(heading)) / 2;
+ memset (indent_spaces, ' ', indents);
+ indent_spaces[indents] = '\0';
+ ret = snprintf (hyphens, total_spacing, "%s%s",
+ indent_spaces, heading);
+ if (ret) {
+ hyphens[ret] = '\0';
+ cli_out ("%s", hyphens);
+ ret = 0;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+ cli_out (" ");
+ }
+
+ /* setting the title "NODE", "MASTER", etc. from title_values[]
+ and printing the same */
+ for (j = 0; j < number_of_fields; j++) {
+ /* Suppressing master and slave output for status detail */
+ if ((is_detail) && ((j == 1) || (j == 2))) {
+ output_values[j][0] = '\0';
+ continue;
+ } else if ((!is_detail) && (j > 4)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ memset (output_values[j], ' ', spacing[j]);
+ memcpy (output_values[j], title_values[j],
+ strlen(title_values[j]));
+ output_values[j][spacing[j]] = '\0';
+ }
+ cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0],
+ output_values[1], output_values[2], output_values[3],
+ output_values[4], output_values[5], output_values[6],
+ output_values[7], output_values[8]);
+
+ /* setting and printing the hyphens */
+ memset (hyphens, '-', total_spacing);
+ hyphens[total_spacing] = '\0';
+ cli_out ("%s", hyphens);
+
+ for (i = 0; i < gsync_count; i++) {
+ for (j = 0; j < number_of_fields; j++) {
+ /* Suppressing master and slave output for
+ * status detail */
+ if ((is_detail) && ((j == 1) || (j == 2))) {
+ output_values[j][0] = '\0';
+ continue;
+ } else if ((!is_detail) && (j > 4)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log ("", GF_LOG_ERROR,
+ "struct member empty.");
+ ret = -1;
+ goto out;
+ }
+ memset (output_values[j], ' ', spacing[j]);
+ memcpy (output_values[j], tmp, strlen (tmp));
+ output_values[j][spacing[j]] = '\0';
+ }
+
+ cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0],
+ output_values[1], output_values[2], output_values[3],
+ output_values[4], output_values[5], output_values[6],
+ output_values[7], output_values[8]);
+ }
+
+out:
+ if (output_values) {
+ for (i = 0; i < number_of_fields; i++) {
+ if (output_values[i])
+ GF_FREE (output_values[i]);
+ }
+ GF_FREE (output_values);
+ }
- ret = dict_get_str (dict, mst, &mst_val);
+ if (hyphens)
+ GF_FREE (hyphens);
+
+ return ret;
+}
+
+int
+gf_cli_read_status_data (dict_t *dict,
+ gf_cli_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count,
+ int number_of_fields)
+{
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+ char mst[PATH_MAX] = {0, };
+ char slv[PATH_MAX] = {0, };
+ char sts[PATH_MAX] = {0, };
+ char nds[PATH_MAX] = {0, };
+ char *status = NULL;
+ char *tmp = NULL;
+
+ /* Storing per node status info in each object */
+ for (i = 0; i < gsync_count; i++) {
+ snprintf (nds, sizeof(nds), "node%d", i + 1);
+ snprintf (mst, sizeof(mst), "master%d", i + 1);
+ snprintf (slv, sizeof(slv), "slave%d", i + 1);
+ snprintf (sts, sizeof(sts), "status%d", i + 1);
+
+ /* Fetching the values from dict, and calculating
+ the max length for each field */
+ ret = dict_get_str (dict, nds, &(sts_vals[i]->node));
if (ret)
goto out;
- ret = dict_get_str (dict, slv, &slv_val);
+ ret = dict_get_str (dict, mst, &(sts_vals[i]->master));
if (ret)
goto out;
- ret = dict_get_str (dict, sts, &sts_val);
+ ret = dict_get_str (dict, slv, &(sts_vals[i]->slave));
if (ret)
goto out;
- cli_out ("%-20s %-50s %-10s", mst_val,
- slv_val, sts_val);
+ ret = dict_get_str (dict, sts, &status);
+ if (ret)
+ goto out;
+
+ /* Fetching health and uptime from sts_val */
+ ret = gf_cli_fetch_gsyncd_status_values (status, sts_vals[i]);
+ if (ret)
+ goto out;
+ for (j = 0; j < number_of_fields; j++) {
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log ("", GF_LOG_ERROR,
+ "struct member empty.");
+ ret = -1;
+ goto out;
+ }
+ if (strlen (tmp) > spacing[j])
+ spacing[j] = strlen (tmp);
+ }
}
- out:
+out:
return ret;
+}
+
+int
+gf_cli_gsync_status_output (dict_t *dict, int status_detail)
+{
+ int gsync_count = 0;
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int spacing[10] = {0};
+ int num_of_fields = 9;
+ char errmsg[1024] = "";
+ char *master = NULL;
+ char *slave = NULL;
+ char *tmp = NULL;
+ char *title_values[] = {"NODE", "MASTER", "SLAVE",
+ "HEALTH", "UPTIME",
+ "FILES SYNCD",
+ "FILES PENDING",
+ "BYTES PENDING",
+ "DELETES PENDING"};
+ gf_cli_gsync_status_t **sts_vals = NULL;
+
+ /* Checks if any session is active or not */
+ ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (ret) {
+ ret = dict_get_str (dict, "master", &master);
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (master) {
+ if (slave)
+ snprintf (errmsg, sizeof(errmsg), "No active "
+ "geo-replication sessions between %s"
+ " and %s", master, slave);
+ else
+ snprintf (errmsg, sizeof(errmsg), "No active "
+ "geo-replication sessions for %s",
+ master);
+ } else
+ snprintf (errmsg, sizeof(errmsg), "No active "
+ "geo-replication sessions");
+
+ gf_log ("cli", GF_LOG_INFO, "%s", errmsg);
+ cli_out ("%s", errmsg);
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; i < num_of_fields; i++)
+ spacing[i] = strlen(title_values[i]);
+
+ /* gsync_count = number of nodes reporting output.
+ each sts_val object will store output of each
+ node */
+ sts_vals = GF_CALLOC (gsync_count, sizeof (gf_cli_gsync_status_t *),
+ gf_common_mt_char);
+ if (!sts_vals) {
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < gsync_count; i++) {
+ sts_vals[i] = GF_CALLOC (1, sizeof (gf_cli_gsync_status_t),
+ gf_common_mt_char);
+ if (!sts_vals[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = gf_cli_read_status_data (dict, sts_vals, spacing,
+ gsync_count, num_of_fields);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read status data");
+ goto out;
+ }
+
+ ret = gf_cli_print_status (title_values, sts_vals, spacing, gsync_count,
+ num_of_fields, status_detail);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to print status output");
+ goto out;
+ }
+
+out:
+ if (sts_vals) {
+ for (i = 0; i < gsync_count; i++) {
+ for (j = 3; j < num_of_fields; j++) {
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (tmp)
+ GF_FREE (tmp);
+ }
+ }
+ GF_FREE (sts_vals);
+ }
+
+ return ret;
+}
+
+static int32_t
+write_contents_to_common_pem_file (dict_t *dict, int output_count)
+{
+ char *workdir = NULL;
+ char common_pem_file[PATH_MAX] = "";
+ char *output = NULL;
+ char output_name[PATH_MAX] = "";
+ int bytes_writen = 0;
+ int fd = -1;
+ int ret = -1;
+ int i = -1;
+
+ ret = dict_get_str (dict, "glusterd_workdir", &workdir);
+ if (ret || !workdir) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch workdir");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (common_pem_file, sizeof(common_pem_file),
+ "%s/geo-replication/common_secret.pem.pub",
+ workdir);
+
+ unlink (common_pem_file);
+
+ fd = open (common_pem_file, O_WRONLY | O_CREAT, 0600);
+ if (fd == -1) {
+ gf_log ("", GF_LOG_ERROR, "Failed to open %s"
+ " Error : %s", common_pem_file,
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 1; i <= output_count; i++) {
+ memset (output_name, '\0', sizeof (output_name));
+ snprintf (output_name, sizeof (output_name),
+ "output_%d", i);
+ ret = dict_get_str (dict, output_name, &output);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get %s.",
+ output_name);
+ cli_out ("Unable to fetch output.");
+ }
+ if (output) {
+ bytes_writen = write (fd, output, strlen(output));
+ if (bytes_writen != strlen(output)) {
+ gf_log ("", GF_LOG_ERROR, "Failed to write "
+ "to %s", common_pem_file);
+ ret = -1;
+ goto out;
+ }
+ /* Adding the new line character */
+ bytes_writen = write (fd, "\n", strlen("\n"));
+ if (bytes_writen != strlen("\n")) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to add new line char");
+ ret = -1;
+ goto out;
+ }
+ output = NULL;
+ }
+ }
+
+ cli_out ("Common secret pub file present at %s", common_pem_file);
+ ret = 0;
+out:
+ if (fd)
+ close (fd);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_sys_exec_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int output_count = -1;
+ int i = -1;
+ char *output = NULL;
+ char *command = NULL;
+ char output_name[PATH_MAX] = "";
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
+ "Command failed.");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "output_count", &output_count);
+ if (ret) {
+ cli_out ("Command executed successfully.");
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "command", &command);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ if (!strcmp (command, "gsec_create")) {
+ ret = write_contents_to_common_pem_file (dict, output_count);
+ if (!ret)
+ goto out;
+ }
+
+ for (i = 1; i <= output_count; i++) {
+ memset (output_name, '\0', sizeof (output_name));
+ snprintf (output_name, sizeof (output_name),
+ "output_%d", i);
+ ret = dict_get_str (dict, output_name, &output);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get %s.",
+ output_name);
+ cli_out ("Unable to fetch output.");
+ }
+ if (output) {
+ cli_out ("%s", output);
+ output = NULL;
+ }
+ }
+
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+gf_cli_copy_file_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
+ "Copy unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ cli_out ("Successfully copied file.");
+
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+gf_cli_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- int ret = 0;
- gf1_cli_gsync_set_rsp rsp = {0, };
+ int ret = -1;
+ gf_cli_rsp rsp = {0, };
dict_t *dict = NULL;
char *gsync_status = NULL;
char *master = NULL;
char *slave = NULL;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ gf_boolean_t status_detail = _gf_false;
+
if (req->rpc_status == -1) {
ret = -1;
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_gsync_set_rsp);
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to get response structure");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -2958,8 +4392,17 @@ gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
if (ret)
goto out;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_gsync (dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
if (rsp.op_ret) {
- cli_out ("%s", rsp.op_errstr ? rsp.op_errstr :
+ cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
GEOREP" command unsuccessful");
ret = rsp.op_ret;
goto out;
@@ -2971,8 +4414,13 @@ gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
else
ret = 0;
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to get type");
+ goto out;
+ }
- switch (rsp.type) {
+ switch (type) {
case GF_GSYNC_OPTION_TYPE_START:
case GF_GSYNC_OPTION_TYPE_STOP:
if (dict_get_str (dict, "master", &master) != 0)
@@ -2982,69 +4430,129 @@ gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
cli_out ("%s " GEOREP " session between %s & %s"
" has been successful",
- rsp.type == GF_GSYNC_OPTION_TYPE_START ?
+ type == GF_GSYNC_OPTION_TYPE_START ?
"Starting" : "Stopping",
master, slave);
break;
case GF_GSYNC_OPTION_TYPE_CONFIG:
- ret = gf_cli3_1_gsync_config_command (dict);
+ ret = gf_cli_gsync_config_command (dict);
break;
case GF_GSYNC_OPTION_TYPE_STATUS:
- ret = gf_cli3_1_gsync_out_status (dict);
- goto out;
+ status_detail = dict_get_str_boolean (dict,
+ "status-detail",
+ _gf_false);
+ ret = gf_cli_gsync_status_output (dict, status_detail);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out ("Deleting " GEOREP " session between %s & %s"
+ " has been successful", master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out ("Creating " GEOREP " session between %s & %s"
+ " has been successful", master, slave);
+ break;
+
default:
cli_out (GEOREP" command executed successfully");
}
out:
-
+ if (dict)
+ dict_unref (dict);
cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+
return ret;
}
int32_t
-gf_cli3_1_gsync_set (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_sys_exec (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = 0;
dict_t *dict = NULL;
- gf1_cli_gsync_set_req req;
+ gf_cli_req req = {{0,}};
if (!frame || !this || !data) {
ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid data");
goto out;
}
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ ret = cli_to_glusterd (&req, frame, gf_cli_sys_exec_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SYS_EXEC, this, cli_rpc_prog,
+ NULL);
+out:
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+int32_t
+gf_cli_copy_file (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{0,}};
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid data");
goto out;
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_GSYNC_SET, NULL,
- this, gf_cli3_1_gsync_set_cbk,
- (xdrproc_t) xdr_gf1_cli_gsync_set_req);
+ dict = data;
+ ret = cli_to_glusterd (&req, frame, gf_cli_copy_file_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_COPY_FILE, this, cli_rpc_prog,
+ NULL);
out:
+ GF_FREE (req.dict.dict_val);
return ret;
}
-void*
-cli_profile_info_elem (void *a, int index)
+int32_t
+gf_cli_gsync_set (call_frame_t *frame, xlator_t *this,
+ void *data)
{
- return ((cli_profile_info_t *)a) + index;
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{0,}};
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_gsync_set_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_GSYNC_SET, this, cli_rpc_prog,
+ NULL);
+
+out:
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
}
+
int
cli_profile_info_percentage_cmp (void *a, void *b)
{
@@ -3063,19 +4571,6 @@ cli_profile_info_percentage_cmp (void *a, void *b)
return ret;
}
-void
-cli_profile_info_swap (void *a, void *b)
-{
- cli_profile_info_t *ia = NULL;
- cli_profile_info_t *ib = NULL;
- cli_profile_info_t tmp = {0};
-
- ia = a;
- ib = b;
- tmp = *ia;
- *ia = *ib;
- *ib = tmp;
-}
void
cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
@@ -3085,7 +4580,6 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
uint64_t sec = 0;
uint64_t r_count = 0;
uint64_t w_count = 0;
- char *brick = NULL;
uint64_t rb_counts[32] = {0};
uint64_t wb_counts[32] = {0};
cli_profile_info_t profile_info[GF_FOP_MAXVALUE] = {{0}};
@@ -3098,9 +4592,6 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
int ret = 0;
double total_percentage_latency = 0;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-brick", count);
- ret = dict_get_str (dict, key, &brick);
for (i = 0; i < 32; i++) {
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "%d-%d-read-%d", count,
@@ -3135,7 +4626,7 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
snprintf (key, sizeof (key), "%d-%d-%d-maxlatency", count,
interval, i);
ret = dict_get_double (dict, key, &profile_info[i].max_latency);
- profile_info[i].fop_name = gf_fop_list[i];
+ profile_info[i].fop_name = (char *)gf_fop_list[i];
total_percentage_latency +=
(profile_info[i].fop_hits * profile_info[i].avg_latency);
@@ -3163,7 +4654,6 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
ret = dict_get_uint64 (dict, key, &w_count);
if (ret == 0) {
- cli_out ("Brick: %s", brick);
}
if (interval == -1)
@@ -3248,26 +4738,30 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
}
int32_t
-gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_stats_volume_rsp rsp = {0,};
+ gf_cli_rsp rsp = {0,};
int ret = -1;
dict_t *dict = NULL;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
char key[256] = {0};
int interval = 0;
int i = 1;
int32_t brick_count = 0;
char *volname = NULL;
+ char *brick = NULL;
+ char str[1024] = {0,};
+
if (-1 == req->rpc_status) {
goto out;
}
gf_log ("cli", GF_LOG_DEBUG, "Received resp to profile");
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_stats_volume_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -3278,8 +4772,8 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
- ret = dict_unserialize (rsp.stats_info.stats_info_val,
- rsp.stats_info.stats_info_len,
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
&dict);
if (ret) {
@@ -3287,7 +4781,17 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
"Unable to allocate memory");
goto out;
} else {
- dict->extra_stdfree = rsp.stats_info.stats_info_val;
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_profile (dict, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
}
ret = dict_get_str (dict, "volname", &volname);
@@ -3299,7 +4803,7 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
if (rsp.op_ret && strcmp (rsp.op_errstr, "")) {
- cli_out ("%s", rsp.op_errstr);
+ cli_err ("%s", rsp.op_errstr);
} else {
switch (op) {
case GF_CLI_STATS_START:
@@ -3337,11 +4841,28 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
if (!brick_count) {
- cli_out ("All bricks of volume %s are down.", volname);
+ cli_err ("All bricks of volume %s are down.", volname);
goto out;
}
while (i <= brick_count) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-brick", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Couldn't get brick name");
+ goto out;
+ }
+
+ ret = dict_get_str_boolean (dict, "nfs", _gf_false);
+ if (ret)
+ snprintf (str, sizeof (str), "NFS Server : %s", brick);
+ else
+ snprintf (str, sizeof (str), "Brick: %s", brick);
+ cli_out ("%s", str);
+ memset (str, '-', strlen (str));
+ cli_out ("%s", str);
+
snprintf (key, sizeof (key), "%d-cumulative", i);
ret = dict_get_int32 (dict, key, &interval);
if (ret == 0) {
@@ -3359,18 +4880,17 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
out:
if (dict)
dict_unref (dict);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.op_errstr);
cli_cmd_broadcast_response (ret);
return ret;
}
int32_t
-gf_cli3_1_profile_volume (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_profile_volume (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
- gf1_cli_stats_volume_req req = {0,};
- dict_t *dict = NULL;
+ gf_cli_req req = {{0,}};
+ dict_t *dict = NULL;
GF_ASSERT (frame);
GF_ASSERT (this);
@@ -3379,67 +4899,65 @@ gf_cli3_1_profile_volume (call_frame_t *frame, xlator_t *this, void *data)
if (!frame || !this || !data)
goto out;
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "op", (int32_t*)&req.op);
- if (ret)
- goto out;
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROFILE_VOLUME, NULL,
- this, gf_cli3_1_profile_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_stats_volume_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_profile_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_stats_volume_rsp rsp = {0,};
+ gf_cli_rsp rsp = {0,};
int ret = -1;
- dict_t *dict = NULL;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ dict_t *dict = NULL;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
char key[256] = {0};
int i = 0;
int32_t brick_count = 0;
char brick[1024];
int32_t members = 0;
- char *filename;
- char *bricks;
- uint64_t value = 0;
+ char *filename;
+ char *bricks;
+ uint64_t value = 0;
int32_t j = 0;
gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
uint64_t nr_open = 0;
uint64_t max_nr_open = 0;
double throughput = 0;
double time = 0;
- long int time_sec = 0;
- long int time_usec = 0;
- struct tm *tm = NULL;
+ int32_t time_sec = 0;
+ long int time_usec = 0;
char timestr[256] = {0, };
- char *openfd_str = NULL;
+ char *openfd_str = NULL;
+ gf_boolean_t nfs = _gf_false;
+ gf_boolean_t clear_stats = _gf_false;
+ int stats_cleared = 0;
if (-1 == req->rpc_status) {
goto out;
}
gf_log ("cli", GF_LOG_DEBUG, "Received resp to top");
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_stats_volume_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "Unable to decode response");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
if (rsp.op_ret) {
if (strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
- cli_out ("volume top unsuccessful");
+ cli_err ("%s", rsp.op_errstr);
+ cli_err ("volume top unsuccessful");
ret = rsp.op_ret;
goto out;
}
@@ -3451,8 +4969,8 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
- ret = dict_unserialize (rsp.stats_info.stats_info_val,
- rsp.stats_info.stats_info_len,
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
&dict);
if (ret) {
@@ -3467,6 +4985,18 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = 0;
goto out;
}
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_top (dict, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ }
+ goto out;
+ }
+
ret = dict_get_int32 (dict, "count", &brick_count);
if (ret)
goto out;
@@ -3474,13 +5004,35 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = dict_get_int32 (dict, key, (int32_t*)&top_op);
if (ret)
goto out;
+
+ clear_stats = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+
while (i < brick_count) {
i++;
snprintf (brick, sizeof (brick), "%d-brick", i);
ret = dict_get_str (dict, brick, &bricks);
if (ret)
goto out;
- cli_out ("Brick: %s", bricks);
+
+ nfs = dict_get_str_boolean (dict, "nfs", _gf_false);
+
+ if (clear_stats) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-stats-cleared", i);
+ ret = dict_get_int32 (dict, key, &stats_cleared);
+ if (ret)
+ goto out;
+ cli_out (stats_cleared ? "Cleared stats for %s %s" :
+ "Failed to clear stats for %s %s",
+ nfs ? "NFS server on" : "brick", bricks);
+ continue;
+ }
+
+ if (nfs)
+ cli_out ("NFS Server : %s", bricks);
+ else
+ cli_out ("Brick: %s", bricks);
+
snprintf(key, sizeof (key), "%d-members", i);
ret = dict_get_int32 (dict, key, &members);
@@ -3525,7 +5077,14 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
if (!members) {
continue;
}
- cli_out ("MBps\t\tfilename\t\t time\n========================");
+ cli_out ("%*s %-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH, "MBps",
+ VOL_TOP_PERF_FILENAME_DEF_WIDTH, "Filename",
+ VOL_TOP_PERF_TIME_WIDTH, "Time");
+ cli_out ("%*s %-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH, "====",
+ VOL_TOP_PERF_FILENAME_DEF_WIDTH, "========",
+ VOL_TOP_PERF_TIME_WIDTH, "====");
break;
default:
goto out;
@@ -3550,14 +5109,27 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = dict_get_int32 (dict, key, (int32_t *)&time_usec);
if (ret)
goto out;
- tm = localtime (&time_sec);
- if (!tm)
- goto out;
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ gf_time_fmt (timestr, sizeof timestr,
+ time_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
".%"GF_PRI_SUSECONDS, time_usec);
-
- cli_out ("%"PRIu64"\t\t%s\t\t%s", value, filename, timestr);
+ if (strlen (filename) < VOL_TOP_PERF_FILENAME_DEF_WIDTH)
+ cli_out ("%*"PRIu64" %-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH,
+ value,
+ VOL_TOP_PERF_FILENAME_DEF_WIDTH,
+ filename,
+ VOL_TOP_PERF_TIME_WIDTH,
+ timestr);
+ else
+ cli_out ("%*"PRIu64" ...%-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH,
+ value,
+ VOL_TOP_PERF_FILENAME_ALT_WIDTH ,
+ filename + strlen (filename) -
+ VOL_TOP_PERF_FILENAME_ALT_WIDTH,
+ VOL_TOP_PERF_TIME_WIDTH,
+ timestr);
} else {
cli_out ("%"PRIu64"\t\t%s", value, filename);
}
@@ -3571,16 +5143,15 @@ out:
if (dict)
dict_unref (dict);
- if (rsp.stats_info.stats_info_val)
- free (rsp.stats_info.stats_info_val);
+ free (rsp.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_top_volume (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_top_volume (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
- gf1_cli_stats_volume_req req = {0,};
+ gf_cli_req req = {{0,}};
dict_t *dict = NULL;
GF_ASSERT (frame);
@@ -3590,43 +5161,40 @@ gf_cli3_1_top_volume (call_frame_t *frame, xlator_t *this, void *data)
if (!frame || !this || !data)
goto out;
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "op", (int32_t*)&req.op);
- if (ret)
- goto out;
-
- ret = dict_allocate_and_serialize (dict,
- &req.dict_req.dict_req_val,
- (size_t *)&req.dict_req.dict_req_len);
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROFILE_VOLUME, NULL,
- this, gf_cli3_1_top_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_stats_volume_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_top_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE (req.dict.dict_val);
return ret;
}
int
-gf_cli3_1_getwd_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_getwd_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf1_cli_getwd_rsp rsp = {0,};
- int ret = 0;
+ int ret = -1;
if (-1 == req->rpc_status) {
goto out;
}
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_getwd_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ cli_err ("getwd failed");
+ ret = rsp.op_ret;
goto out;
}
@@ -3642,7 +5210,7 @@ out:
}
int32_t
-gf_cli3_1_getwd (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_getwd (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
gf1_cli_getwd_req req = {0,};
@@ -3655,7 +5223,7 @@ gf_cli3_1_getwd (call_frame_t *frame, xlator_t *this, void *data)
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_GETWD, NULL,
- this, gf_cli3_1_getwd_cbk,
+ this, gf_cli_getwd_cbk,
(xdrproc_t) xdr_gf1_cli_getwd_req);
out:
@@ -3664,39 +5232,1172 @@ out:
return ret;
}
-static int
-gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+void
+cli_print_volume_status_mempool (dict_t *dict, char *prefix)
{
- gf1_cli_status_volume_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- char *hostname = NULL;
- char *path = NULL;
- int i = 0;
- int port = 0;
- int online = 0;
- char key[1024] = {0,};
- int pid = 0;
- char brick[8192] = {0,};
- char *volname = NULL;
+ int ret = -1;
+ int32_t mempool_count = 0;
+ char *name = NULL;
+ int32_t hotcount = 0;
+ int32_t coldcount = 0;
+ uint64_t paddedsizeof = 0;
+ uint64_t alloccount = 0;
+ int32_t maxalloc = 0;
+ uint64_t pool_misses = 0;
+ int32_t maxstdalloc = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.mempool-count",prefix);
+ ret = dict_get_int32 (dict, key, &mempool_count);
+ if (ret)
+ goto out;
+
+ cli_out ("Mempool Stats\n-------------");
+ cli_out ("%-30s %9s %9s %12s %10s %8s %8s %12s", "Name", "HotCount",
+ "ColdCount", "PaddedSizeof", "AllocCount", "MaxAlloc",
+ "Misses", "Max-StdAlloc");
+ cli_out ("%-30s %9s %9s %12s %10s %8s %8s %12s", "----", "--------",
+ "---------", "------------", "----------",
+ "--------", "--------", "------------");
+
+ for (i = 0; i < mempool_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.name", prefix, i);
+ ret = dict_get_str (dict, key, &name);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.hotcount", prefix, i);
+ ret = dict_get_int32 (dict, key, &hotcount);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.coldcount", prefix, i);
+ ret = dict_get_int32 (dict, key, &coldcount);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.paddedsizeof",
+ prefix, i);
+ ret = dict_get_uint64 (dict, key, &paddedsizeof);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.alloccount", prefix, i);
+ ret = dict_get_uint64 (dict, key, &alloccount);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.max_alloc", prefix, i);
+ ret = dict_get_int32 (dict, key, &maxalloc);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.max-stdalloc", prefix, i);
+ ret = dict_get_int32 (dict, key, &maxstdalloc);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.pool-misses", prefix, i);
+ ret = dict_get_uint64 (dict, key, &pool_misses);
+ if (ret)
+ goto out;
+
+ cli_out ("%-30s %9d %9d %12"PRIu64" %10"PRIu64" %8d %8"PRIu64
+ " %12d", name, hotcount, coldcount, paddedsizeof,
+ alloccount, maxalloc, pool_misses, maxstdalloc);
+ }
+
+out:
+ return;
+
+}
+
+void
+cli_print_volume_status_mem (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ char key[1024] = {0,};
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ int val = 0;
+ int i = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("Memory status for volume : %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ continue;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ continue;
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ cli_out ("Mallinfo\n--------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.arena", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d","Arena", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.ordblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if(ret)
+ goto out;
+ cli_out ("%-8s : %d","Ordblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.smblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if(ret)
+ goto out;
+ cli_out ("%-8s : %d","Smblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.hblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if(ret)
+ goto out;
+ cli_out ("%-8s : %d", "Hblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.hblkhd", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Hblkhd", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.usmblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Usmblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.fsmblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Fsmblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.uordblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Uordblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.fordblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Fordblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.keepcost", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Keepcost", val);
+
+ cli_out (" ");
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d", i);
+ cli_print_volume_status_mempool (dict, key);
+ }
+out:
+ cli_out ("----------------------------------------------\n");
+ return;
+}
+
+void
+cli_print_volume_status_clients (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int client_count = 0;
+ char *clientname = NULL;
+ uint64_t bytesread = 0;
+ uint64_t byteswrite = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("Client connections for volume %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.clientcount", i);
+ ret = dict_get_int32 (dict, key, &client_count);
+ if (ret)
+ goto out;
+
+ cli_out ("Clients connected : %d", client_count);
+ if (client_count == 0)
+ continue;
+
+ cli_out ("%-48s %15s %15s", "Hostname", "BytesRead",
+ "BytesWritten");
+ cli_out ("%-48s %15s %15s", "--------", "---------",
+ "------------");
+ for (j =0; j < client_count; j++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "brick%d.client%d.hostname", i, j);
+ ret = dict_get_str (dict, key, &clientname);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "brick%d.client%d.bytesread", i, j);
+ ret = dict_get_uint64 (dict, key, &bytesread);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "brick%d.client%d.byteswrite", i, j);
+ ret = dict_get_uint64 (dict, key, &byteswrite);
+ if (ret)
+ goto out;
+
+ cli_out ("%-48s %15"PRIu64" %15"PRIu64,
+ clientname, bytesread, byteswrite);
+ }
+ }
+out:
+ cli_out ("----------------------------------------------\n");
+ return;
+}
+
+void
+cli_print_volume_status_inode_entry (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ char *gfid = NULL;
+ uint64_t nlookup = 0;
+ uint32_t ref = 0;
+ int ia_type = 0;
+ char inode_type;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.gfid", prefix);
+ ret = dict_get_str (dict, key, &gfid);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.nlookup", prefix);
+ ret = dict_get_uint64 (dict, key, &nlookup);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ref", prefix);
+ ret = dict_get_uint32 (dict, key, &ref);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ia_type", prefix);
+ ret = dict_get_int32 (dict, key, &ia_type);
+ if (ret)
+ goto out;
+
+ switch (ia_type) {
+ case IA_IFREG:
+ inode_type = 'R';
+ break;
+ case IA_IFDIR:
+ inode_type = 'D';
+ break;
+ case IA_IFLNK:
+ inode_type = 'L';
+ break;
+ case IA_IFBLK:
+ inode_type = 'B';
+ break;
+ case IA_IFCHR:
+ inode_type = 'C';
+ break;
+ case IA_IFIFO:
+ inode_type = 'F';
+ break;
+ case IA_IFSOCK:
+ inode_type = 'S';
+ break;
+ default:
+ inode_type = 'I';
+ break;
+ }
+
+ cli_out ("%-40s %14"PRIu64" %14"PRIu32" %9c",
+ gfid, nlookup, ref, inode_type);
+
+out:
+ return;
+
+}
+
+void
+cli_print_volume_status_itables (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ uint32_t active_size = 0;
+ uint32_t lru_size = 0;
+ uint32_t purge_size = 0;
+ int i =0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.active_size", prefix);
+ ret = dict_get_uint32 (dict, key, &active_size);
+ if (ret)
+ goto out;
+ if (active_size != 0) {
+ cli_out ("Active inodes:");
+ cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
+ "IA type");
+ cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
+ "-------");
+ }
+ for (i = 0; i < active_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.active%d", prefix, i);
+ cli_print_volume_status_inode_entry (dict, key);
+ }
+ cli_out (" ");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.lru_size", prefix);
+ ret = dict_get_uint32 (dict, key, &lru_size);
+ if (ret)
+ goto out;
+ if (lru_size != 0) {
+ cli_out ("LRU inodes:");
+ cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
+ "IA type");
+ cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
+ "-------");
+ }
+ for (i = 0; i < lru_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.lru%d", prefix, i);
+ cli_print_volume_status_inode_entry (dict, key);
+ }
+ cli_out (" ");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.purge_size", prefix);
+ ret = dict_get_uint32 (dict, key, &purge_size);
+ if (ret)
+ goto out;
+ if (purge_size != 0) {
+ cli_out ("Purged inodes:");
+ cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
+ "IA type");
+ cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
+ "-------");
+ }
+ for (i = 0; i < purge_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.purge%d", prefix, i);
+ cli_print_volume_status_inode_entry (dict, key);
+ }
+
+out:
+ return;
+}
+
+void
+cli_print_volume_status_inode (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int conn_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("Inode tables for volume %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for ( i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conncount", i);
+ ret = dict_get_int32 (dict, key, &conn_count);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < conn_count; j++) {
+ if (conn_count > 1)
+ cli_out ("Connection %d:", j+1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conn%d.itable",
+ i, j);
+ cli_print_volume_status_itables (dict, key);
+ cli_out (" ");
+ }
+ }
+out:
+ cli_out ("----------------------------------------------");
+ return;
+}
+
+void
+cli_print_volume_status_fdtable (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ int refcount = 0;
+ uint32_t maxfds = 0;
+ int firstfree = 0;
+ int openfds = 0;
+ int fd_pid = 0;
+ int fd_refcount = 0;
+ int fd_flags = 0;
+ int i = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_get_int32 (dict, key, &refcount);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.maxfds", prefix);
+ ret = dict_get_uint32 (dict, key, &maxfds);
+ if (ret)
+ goto out;
+
+ memset (key, 0 ,sizeof (key));
+ snprintf (key, sizeof (key), "%s.firstfree", prefix);
+ ret = dict_get_int32 (dict, key, &firstfree);
+ if (ret)
+ goto out;
+
+ cli_out ("RefCount = %d MaxFDs = %d FirstFree = %d",
+ refcount, maxfds, firstfree);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.openfds", prefix);
+ ret = dict_get_int32 (dict, key, &openfds);
+ if (ret)
+ goto out;
+ if (0 == openfds) {
+ cli_err ("No open fds");
+ goto out;
+ }
+
+ cli_out ("%-19s %-19s %-19s %-19s", "FD Entry", "PID",
+ "RefCount", "Flags");
+ cli_out ("%-19s %-19s %-19s %-19s", "--------", "---",
+ "--------", "-----");
+
+ for (i = 0; i < maxfds ; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.pid", prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_pid);
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.refcount",
+ prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_refcount);
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.flags", prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_flags);
+ if (ret)
+ continue;
+
+ cli_out ("%-19d %-19d %-19d %-19d", i, fd_pid, fd_refcount,
+ fd_flags);
+ }
+
+out:
+ return;
+}
+
+void
+cli_print_volume_status_fd (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int conn_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("FD tables for volume %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conncount", i);
+ ret = dict_get_int32 (dict, key, &conn_count);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < conn_count; j++) {
+ cli_out ("Connection %d:", j+1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conn%d.fdtable",
+ i, j);
+ cli_print_volume_status_fdtable (dict, key);
+ cli_out (" ");
+ }
+ }
+out:
+ cli_out ("----------------------------------------------");
+ return;
+}
+
+void
+cli_print_volume_status_call_frame (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ int ref_count = 0;
+ char *translator = 0;
+ int complete = 0;
+ char *parent = NULL;
+ char *wind_from = NULL;
+ char *wind_to = NULL;
+ char *unwind_from = NULL;
+ char *unwind_to = NULL;
+
+ if (!dict || !prefix)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_get_int32 (dict, key, &ref_count);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.translator", prefix);
+ ret = dict_get_str (dict, key, &translator);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.complete", prefix);
+ ret = dict_get_int32 (dict, key, &complete);
+ if (ret)
+ return;
+
+ cli_out (" Ref Count = %d", ref_count);
+ cli_out (" Translator = %s", translator);
+ cli_out (" Completed = %s", (complete ? "Yes" : "No"));
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.parent", prefix);
+ ret = dict_get_str (dict, key, &parent);
+ if (!ret)
+ cli_out (" Parent = %s", parent);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windfrom", prefix);
+ ret = dict_get_str (dict, key, &wind_from);
+ if (!ret)
+ cli_out (" Wind From = %s", wind_from);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windto", prefix);
+ ret = dict_get_str (dict, key, &wind_to);
+ if (!ret)
+ cli_out (" Wind To = %s", wind_to);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
+ ret = dict_get_str (dict, key, &unwind_from);
+ if (!ret)
+ cli_out (" Unwind From = %s", unwind_from);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwindto", prefix);
+ ret = dict_get_str (dict, key, &unwind_to);
+ if (!ret)
+ cli_out (" Unwind To = %s", unwind_to);
+}
+
+void
+cli_print_volume_status_call_stack (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ int uid = 0;
+ int gid = 0;
+ int pid = 0;
+ uint64_t unique = 0;
+ //char *op = NULL;
+ int count = 0;
+ int i = 0;
+
+ if (!dict || !prefix)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.uid", prefix);
+ ret = dict_get_int32 (dict, key, &uid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.gid", prefix);
+ ret = dict_get_int32 (dict, key, &gid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pid", prefix);
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unique", prefix);
+ ret = dict_get_uint64 (dict, key, &unique);
+ if (ret)
+ return;
+
+ /*
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.op", prefix);
+ ret = dict_get_str (dict, key, &op);
+ if (ret)
+ return;
+ */
+
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.count", prefix);
+ ret = dict_get_int32 (dict, key, &count);
+ if (ret)
+ return;
+
+ cli_out (" UID : %d", uid);
+ cli_out (" GID : %d", gid);
+ cli_out (" PID : %d", pid);
+ cli_out (" Unique : %"PRIu64, unique);
+ //cli_out ("\tOp : %s", op);
+ cli_out (" Frames : %d", count);
+
+ for (i = 0; i < count; i++) {
+ cli_out (" Frame %d", i+1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.frame%d", prefix, i);
+ cli_print_volume_status_call_frame (dict, key);
+ }
+
+ cli_out (" ");
+}
+
+void
+cli_print_volume_status_callpool (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int call_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("Pending calls for volume %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.callpool.count", i);
+ ret = dict_get_int32 (dict, key, &call_count);
+ if (ret)
+ goto out;
+ cli_out ("Pending calls: %d", call_count);
+
+ if (0 == call_count)
+ continue;
+
+ for (j = 0; j < call_count; j++) {
+ cli_out ("Call Stack%d", j+1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "brick%d.callpool.stack%d", i, j);
+ cli_print_volume_status_call_stack (dict, key);
+ }
+ }
+
+out:
+ cli_out ("----------------------------------------------");
+ return;
+}
+
+static void
+cli_print_volume_status_tasks (dict_t *dict)
+{
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int count = 0;
+ int task_count = 0;
+ int status = 0;
+ char *op = NULL;
+ char *task_id_str = NULL;
+ char *volname = NULL;
+ char key[1024] = {0,};
+ char task[1024] = {0,};
+ char *brick = NULL;
+ char *src_brick = NULL;
+ char *dest_brick = NULL;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "tasks", &task_count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get tasks count");
+ return;
+ }
+
+ cli_out ("Task Status of Volume %s", volname);
+ cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
+
+ if (task_count == 0) {
+ cli_out ("There are no active volume tasks");
+ cli_out (" ");
+ return;
+ }
+
+ for (i = 0; i < task_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.type", i);
+ ret = dict_get_str(dict, key, &op);
+ if (ret)
+ return;
+ cli_out ("%-20s : %-20s", "Task", op);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.id", i);
+ ret = dict_get_str (dict, key, &task_id_str);
+ if (ret)
+ return;
+ cli_out ("%-20s : %-20s", "ID", task_id_str);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ return;
+
+ snprintf (task, sizeof (task), "task%d", i);
+
+ /*
+ Replace brick only has two states - In progress and Complete
+ Ref: xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+ */
+
+ if (!strcmp (op, "Replace brick")) {
+ if (status)
+ status = GF_DEFRAG_STATUS_COMPLETE;
+ else
+ status = GF_DEFRAG_STATUS_STARTED;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.src-brick", task);
+ ret = dict_get_str (dict, key, &src_brick);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s : %-20s", "Source Brick", src_brick);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.dst-brick", task);
+ ret = dict_get_str (dict, key, &dest_brick);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s : %-20s", "Destination Brick",
+ dest_brick);
+
+ } else if (!strcmp (op, "Remove brick")) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.count", task);
+ ret = dict_get_int32 (dict, key, &count);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s", "Removed bricks:");
+
+ for (j = 1; j <= count; j++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),"%s.brick%d",
+ task, j);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s", brick);
+ }
+ }
+ cli_out ("%-20s : %-20s", "Status",
+ cli_vol_task_status_str[status]);
+ cli_out (" ");
+ }
+
+out:
+ return;
+}
+
+static int
+gf_cli_status_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ int i = 0;
+ int pid = -1;
+ uint32_t cmd = 0;
+ gf_boolean_t notbrick = _gf_false;
+ char key[1024] = {0,};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ gf_cli_rsp rsp = {0,};
+ cli_volume_status_t status = {0};
+ cli_local_t *local = NULL;
+ gf_boolean_t wipe_local = _gf_false;
+ char msg[1024] = {0,};
if (req->rpc_status == -1)
goto out;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_status_volume_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR, "Volume status response error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_DEBUG, "Received response to status cmd");
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
- else if (rsp.op_ret)
- cli_out ("Unable to obtain volume status information.");
+ local = ((call_frame_t *)myframe)->local;
+ if (!local) {
+ local = cli_local_get ();
+ if (!local) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get local");
+ goto out;
+ }
+ wipe_local = _gf_true;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "Unable to obtain volume "
+ "status information.");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (!local->all)
+ cli_xml_output_str ("volStatus", msg,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ ret = 0;
+ goto out;
+ }
+
+ cli_err ("%s", msg);
+ if (local && local->all) {
+ ret = 0;
+ cli_out (" ");
+ } else
+ ret = -1;
+
+ goto out;
+ }
dict = dict_new ();
if (!dict)
@@ -3708,92 +6409,335 @@ gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
if (ret)
goto out;
-
- ret = dict_get_int32 (dict, "count", &count);
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
if (ret)
goto out;
+ if ((cmd & GF_CLI_STATUS_ALL)) {
+ if (local && local->dict) {
+ dict_ref (dict);
+ ret = dict_set_static_ptr (local->dict, "rsp-dict", dict);
+ ret = 0;
+ } else {
+ gf_log ("cli", GF_LOG_ERROR, "local not found");
+ ret = -1;
+ }
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_NFS) || (cmd & GF_CLI_STATUS_SHD))
+ notbrick = _gf_true;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_begin (local,
+ rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+ if (cmd & GF_CLI_STATUS_TASKS) {
+ ret = cli_xml_output_vol_status_tasks_detail (local,
+ dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,"Error outputting "
+ "to xml");
+ goto out;
+ }
+ } else {
+ ret = cli_xml_output_vol_status (local, dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_end (local);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ }
+ }
+ goto out;
+ }
+
+ status.brick = GF_CALLOC (1, PATH_MAX + 256, gf_common_mt_strdup);
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ cli_print_volume_status_mem (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CLIENTS:
+ cli_print_volume_status_clients (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_INODE:
+ cli_print_volume_status_inode (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_FD:
+ cli_print_volume_status_fd (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CALLPOOL:
+ cli_print_volume_status_callpool (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_TASKS:
+ cli_print_volume_status_tasks (dict);
+ goto cont;
+ break;
+ default:
+ break;
+ }
+
ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+
+ cli_out ("Status of volume: %s", volname);
+
+ if ((cmd & GF_CLI_STATUS_DETAIL) == 0) {
+ cli_out ("Gluster process\t\t\t\t\t\tPort\tOnline\tPid");
+ cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
+ }
+
+ for (i = 0; i <= index_max; i++) {
+
- cli_out ("Brick status for volume: %s", volname);
- cli_out ("Brick\t\t\t\t\t\t\tPort\tOnline\tPID");
- for (i = 0; i < count; i++) {
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "brick%d.hostname", i);
ret = dict_get_str (dict, key, &hostname);
if (ret)
- goto out;
+ continue;
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "brick%d.path", i);
ret = dict_get_str (dict, key, &path);
if (ret)
- goto out;
+ continue;
+
+ /* Brick/not-brick is handled seperately here as all
+ * types of nodes are contained in the default output
+ */
+ memset (status.brick, 0, PATH_MAX + 255);
+ if (!strcmp (hostname, "NFS Server") ||
+ !strcmp (hostname, "Self-heal Daemon"))
+ snprintf (status.brick, PATH_MAX + 255, "%s on %s",
+ hostname, path);
+ else
+ snprintf (status.brick, PATH_MAX + 255, "Brick %s:%s",
+ hostname, path);
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "brick%d.port", i);
- ret = dict_get_int32 (dict, key, &port);
+ ret = dict_get_int32 (dict, key, &(status.port));
if (ret)
- goto out;
+ continue;
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
+ ret = dict_get_int32 (dict, key, &(status.online));
if (ret)
- goto out;
+ continue;
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "brick%d.pid", i);
ret = dict_get_int32 (dict, key, &pid);
+ if (ret)
+ continue;
+ if (pid == -1)
+ ret = gf_asprintf (&(status.pid_str), "%s", "N/A");
+ else
+ ret = gf_asprintf (&(status.pid_str), "%d", pid);
- snprintf (brick, sizeof (brick) -1, "%s:%s", hostname, path);
+ if (ret == -1)
+ goto out;
- cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
- cli_print_brick_status (brick, port, online, pid);
+ if ((cmd & GF_CLI_STATUS_DETAIL)) {
+ ret = cli_get_detail_status (dict, i, &status);
+ if (ret)
+ goto out;
+ cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
+ cli_print_detailed_status (&status);
+
+ } else {
+ cli_print_brick_status (&status);
+ }
}
+ cli_out (" ");
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE)
+ cli_print_volume_status_tasks (dict);
+cont:
ret = rsp.op_ret;
- out:
+out:
+ if (dict)
+ dict_unref (dict);
+ GF_FREE (status.brick);
+ if (local && wipe_local) {
+ cli_local_wipe (local);
+ }
+
cli_cmd_broadcast_response (ret);
return ret;
}
int32_t
-gf_cli3_1_status_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_status_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
{
- gf1_cli_status_volume_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
+ gf_cli_req req = {{0,}};
+ int ret = -1;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
+ if (!frame || !this || !data)
goto out;
- }
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
+ ret = cli_to_glusterd (&req, frame, gf_cli_status_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STATUS_VOLUME, this, cli_rpc_prog,
+ NULL);
+ out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning: %d", ret);
+ return ret;
+}
+
+int
+gf_cli_status_volume_all (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int i = 0;
+ int ret = -1;
+ int vol_count = -1;
+ uint32_t cmd = 0;
+ char key[1024] = {0};
+ char *volname = NULL;
+ void *vol_dict = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (frame->local) {
+ local = frame->local;
+ local->all = _gf_true;
+ } else
+ goto out;
+
+ ret = dict_get_uint32 (local->dict, "cmd", &cmd);
if (ret)
goto out;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STATUS_VOLUME, NULL,
- this, gf_cli3_1_status_cbk,
- (xdrproc_t)xdr_gf1_cli_status_volume_req);
+
+ ret = gf_cli_status_volume (frame, this, data);
+ if (ret)
+ goto out;
+
+ ret = dict_get_ptr (local->dict, "rsp-dict", &vol_dict);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 ((dict_t *)vol_dict, "vol_count", &vol_count);
+ if (ret) {
+ cli_err ("Failed to get names of volumes");
+ goto out;
+ }
+
+ /* remove the "all" flag in cmd */
+ cmd &= ~GF_CLI_STATUS_ALL;
+ cmd |= GF_CLI_STATUS_VOL;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ //TODO: Pass proper op_* values
+ ret = cli_xml_output_vol_status_begin (local, 0,0, NULL);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+
+ if (vol_count == 0 && !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_err ("No volumes present");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; i < vol_count; i++) {
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "vol%d", i);
+ ret = dict_get_str (vol_dict, key, &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_uint32 (dict, "cmd", cmd);
+ if (ret)
+ goto out;
+
+ ret = gf_cli_status_volume (frame, this, dict);
+ if (ret)
+ goto out;
+
+ dict_unref (dict);
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_status_end (local);
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning: %d", ret);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR, "status all failed");
+
+ if (vol_dict)
+ dict_unref (vol_dict);
+
+ if (ret && dict)
+ dict_unref (dict);
+
+ if (local)
+ cli_local_wipe (local);
+
+ if (frame)
+ frame->local = NULL;
+
return ret;
}
static int
-gf_cli3_1_mount_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_mount_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf1_cli_mount_rsp rsp = {0,};
- int ret = 0;
+ int ret = -1;
if (-1 == req->rpc_status) {
goto out;
@@ -3801,7 +6745,8 @@ gf_cli3_1_mount_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_mount_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -3812,9 +6757,9 @@ gf_cli3_1_mount_cbk (struct rpc_req *req, struct iovec *iov,
cli_out ("%s", rsp.path);
} else {
/* weird sounding but easy to parse... */
- cli_out ("%d : failed with this errno (%s)",
+ cli_err ("%d : failed with this errno (%s)",
rsp.op_errno, strerror (rsp.op_errno));
- ret = 1;
+ ret = -1;
}
out:
@@ -3823,7 +6768,7 @@ out:
}
int32_t
-gf_cli3_1_mount (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_mount (call_frame_t *frame, xlator_t *this, void *data)
{
gf1_cli_mount_req req = {0,};
int ret = -1;
@@ -3839,7 +6784,7 @@ gf_cli3_1_mount (call_frame_t *frame, xlator_t *this, void *data)
req.label = label;
ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ &req.dict.dict_len);
if (ret) {
ret = -1;
goto out;
@@ -3847,7 +6792,7 @@ gf_cli3_1_mount (call_frame_t *frame, xlator_t *this, void *data)
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_MOUNT, NULL,
- this, gf_cli3_1_mount_cbk,
+ this, gf_cli_mount_cbk,
(xdrproc_t)xdr_gf1_cli_mount_req);
out:
@@ -3856,11 +6801,11 @@ out:
}
static int
-gf_cli3_1_umount_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_umount_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf1_cli_umount_rsp rsp = {0,};
- int ret = 0;
+ int ret = -1;
if (-1 == req->rpc_status) {
goto out;
@@ -3868,7 +6813,8 @@ gf_cli3_1_umount_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_umount_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -3877,8 +6823,8 @@ gf_cli3_1_umount_cbk (struct rpc_req *req, struct iovec *iov,
if (rsp.op_ret == 0)
ret = 0;
else {
- cli_out ("umount failed");
- ret = 1;
+ cli_err ("umount failed");
+ ret = -1;
}
out:
@@ -3887,7 +6833,7 @@ out:
}
int32_t
-gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_umount (call_frame_t *frame, xlator_t *this, void *data)
{
gf1_cli_umount_req req = {0,};
int ret = -1;
@@ -3909,7 +6855,7 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_UMOUNT, NULL,
- this, gf_cli3_1_umount_cbk,
+ this, gf_cli_umount_cbk,
(xdrproc_t)xdr_gf1_cli_umount_req);
out:
@@ -3917,110 +6863,453 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
return ret;
}
+void
+cmd_heal_volume_statistics_out (dict_t *dict, int brick)
+{
+
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ uint64_t i = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ snprintf (key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ cli_out ("------------------------------------------------");
+ cli_out ("\nCrawl statistics for brick no %d", brick);
+ cli_out ("Hostname of brick %s", hostname);
+
+ snprintf (key, sizeof key, "statistics-%d-count", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < num_entries; i++)
+ {
+ snprintf (key, sizeof key, "statistics_crawl_type-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_str (dict, key, &crawl_type);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof key, "statistics_healed_cnt-%d-%"PRIu64,
+ brick,i);
+ ret = dict_get_uint64 (dict, key, &healed_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof key, "statistics_sb_cnt-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_uint64 (dict, key, &split_brain_count);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_heal_failed_cnt-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_uint64 (dict, key, &heal_failed_count);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_strt_time-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_str (dict, key, &start_time_str);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_end_time-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_str (dict, key, &end_time_str);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_inprogress-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_int32 (dict, key, &progress);
+ if (ret)
+ goto out;
+
+ cli_out ("\nStarting time of crawl: %s", start_time_str);
+ if (progress == 1)
+ cli_out ("Crawl is in progress");
+ else
+ cli_out ("Ending time of crawl: %s", end_time_str);
+
+ cli_out ("Type of crawl: %s", crawl_type);
+ cli_out ("No. of entries healed: %"PRIu64,
+ healed_count);
+ cli_out ("No. of entries in split-brain: %"PRIu64,
+ split_brain_count);
+ cli_out ("No. of heal failed entries: %"PRIu64,
+ heal_failed_count);
+
+ }
+
+
+out:
+ return;
+}
+
+void
+cmd_heal_volume_brick_out (dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
+ uint64_t i = 0;
+ uint32_t time = 0;
+ char timestr[32] = {0};
+ char *shd_status = NULL;
+
+ snprintf (key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "%d-path", brick);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ cli_out ("\nBrick %s:%s", hostname, path);
+
+ snprintf (key, sizeof key, "%d-status", brick);
+ ret = dict_get_str (dict, key, &status);
+ if (status && strlen (status))
+ cli_out ("Status: %s", status);
+
+ snprintf (key, sizeof key, "%d-shd-status",brick);
+ ret = dict_get_str (dict, key, &shd_status);
+
+ if(!shd_status)
+ {
+ snprintf (key, sizeof key, "%d-count", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ cli_out ("Number of entries: %"PRIu64, num_entries);
+
+
+ for (i = 0; i < num_entries; i++) {
+ snprintf (key, sizeof key, "%d-%"PRIu64, brick, i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ continue;
+ time = 0;
+ snprintf (key, sizeof key, "%d-%"PRIu64"-time",
+ brick, i);
+ ret = dict_get_uint32 (dict, key, &time);
+ if (!time) {
+ cli_out ("%s", path);
+ } else {
+ gf_time_fmt (timestr, sizeof timestr,
+ time, gf_timefmt_FT);
+ if (i == 0) {
+ cli_out ("at path on brick");
+ cli_out ("-----------------------------------");
+ }
+ cli_out ("%s %s", timestr, path);
+ }
+ }
+ }
+
+out:
+ return;
+}
+
+
+void
+cmd_heal_volume_statistics_heal_count_out (dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
+ char *shd_status = NULL;
+
+ snprintf (key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "%d-path", brick);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ cli_out ("\nBrick %s:%s", hostname, path);
+
+ snprintf (key, sizeof key, "%d-status", brick);
+ ret = dict_get_str (dict, key, &status);
+ if (status && strlen (status))
+ cli_out ("Status: %s", status);
+
+ snprintf (key, sizeof key, "%d-shd-status",brick);
+ ret = dict_get_str (dict, key, &shd_status);
+
+ if(!shd_status)
+ {
+ snprintf (key, sizeof key, "%d-hardlinks", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ if (ret)
+ cli_out ("No gathered input for this brick");
+ else
+ cli_out ("Number of entries: %"PRIu64, num_entries);
+
+
+ }
+
+out:
+ return;
+}
+
+
int
-gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_heal_vol_rsp rsp = {0,};
- int ret = 0;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
cli_local_t *local = NULL;
char *volname = NULL;
call_frame_t *frame = NULL;
+ dict_t *input_dict = NULL;
+ dict_t *dict = NULL;
+ int brick_count = 0;
+ int i = 0;
+ gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
+ char *operation = NULL;
+ char *substr = NULL;
+ char *heal_op_str = NULL;
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_heal_vol_rsp);
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- frame = myframe;
-
- if (frame) {
+ if (frame)
local = frame->local;
- frame->local = NULL;
- }
- if (local)
- volname = local->u.heal_vol.volname;
+ if (local) {
+ input_dict = local->dict;
+ ret = dict_get_int32 (input_dict, "heal-op",
+ (int32_t*)&heal_op);
+ }
+//TODO: Proper XML output
+//#if (HAVE_LIB_XML)
+// if (global_state->mode & GLUSTER_MODE_XML) {
+// ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret,
+// rsp.op_errno, rsp.op_errstr);
+// if (ret)
+// gf_log ("cli", GF_LOG_ERROR,
+// "Error outputting to xml");
+// goto out;
+// }
+//#endif
+
+ ret = dict_get_str (input_dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to get volname");
+ goto out;
+ }
gf_log ("cli", GF_LOG_INFO, "Received resp to heal volume");
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_out ("%s", rsp.op_errstr);
- else
- cli_out ("Starting heal on volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
+ switch (heal_op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ heal_op_str = "to perform index self heal";
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ heal_op_str = "to perform full self heal";
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ heal_op_str = "list of entries to be healed";
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ heal_op_str = "list of healed entries";
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ heal_op_str = "list of heal failed entries";
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ heal_op_str = "list of split brain entries";
+ break;
+ case GF_AFR_OP_STATISTICS:
+ heal_op_str = "crawl statistics";
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ heal_op_str = "count of entries to be healed";
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ heal_op_str = "count of entries to be healed per replica";
+ break;
+ case GF_AFR_OP_INVALID:
+ heal_op_str = "invalid heal op";
+ break;
+ }
+
+ if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
+ (heal_op == GF_AFR_OP_HEAL_INDEX)) {
+ operation = "Launching heal operation";
+ substr = "\nUse heal info commands to check status";
+ } else {
+ operation = "Gathering";
+ substr = "";
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, "")) {
+ cli_err ("%s", rsp.op_errstr);
+ } else {
+ cli_err ("%s %s on volume %s has been unsuccessful",
+ operation, heal_op_str, volname);
+ }
+
+ ret = rsp.op_ret;
+ goto out;
+ } else {
+ cli_out ("%s %s on volume %s has been successful %s", operation,
+ heal_op_str, volname, substr);
+ }
+
+ ret = rsp.op_ret;
+ if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
+ (heal_op == GF_AFR_OP_HEAL_INDEX))
+ goto out;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to allocate memory");
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+
+ if (!brick_count) {
+ cli_err ("All bricks of volume %s are down.", volname);
+ goto out;
+ }
+
+ switch (heal_op) {
+ case GF_AFR_OP_STATISTICS:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_out (dict, i);
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_heal_count_out (dict,
+ i);
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ case GF_AFR_OP_HEALED_FILES:
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_brick_out (dict, i);
+ break;
+ default:
+ break;
+ }
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (local)
- cli_local_wipe (local);
- if (rsp.volname)
- free (rsp.volname);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.op_errstr);
+ if (dict)
+ dict_unref (dict);
return ret;
}
int32_t
-gf_cli3_1_heal_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_heal_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_heal_vol_req *req = NULL;
+ gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
+ dict_t *dict = NULL;
if (!frame || !this || !data) {
ret = -1;
goto out;
}
- req = data;
- local = cli_local_get ();
-
- if (local) {
- local->u.heal_vol.volname = req->volname;
- frame->local = local;
- }
+ dict = data;
- ret = cli_cmd_submit (req, frame, cli_rpc_prog,
- GLUSTER_CLI_HEAL_VOLUME, NULL,
- this, gf_cli3_1_heal_volume_cbk,
- (xdrproc_t) xdr_gf1_cli_heal_vol_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_heal_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_HEAL_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE (req.dict.dict_val);
+
return ret;
}
int32_t
-gf_cli3_1_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_statedump_vol_rsp rsp = {0,};
+ gf_cli_rsp rsp = {0,};
int ret = -1;
+ char msg[1024] = {0,};
if (-1 == req->rpc_status)
goto out;
ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf1_cli_statedump_vol_rsp);
+ (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- gf_log ("cli", GF_LOG_DEBUG, "Recieved response to statedump");
+ gf_log ("cli", GF_LOG_DEBUG, "Received response to statedump");
if (rsp.op_ret)
- cli_out ("%s", rsp.op_errstr);
+ snprintf (msg, sizeof(msg), "%s", rsp.op_errstr);
else
- cli_out ("Volume statedump sucessful");
+ snprintf (msg, sizeof (msg), "Volume statedump successful");
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volStatedump", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume statedump: failed: %s", msg);
+ else
+ cli_out ("volume statedump: success");
ret = rsp.op_ret;
out:
@@ -4029,14 +7318,202 @@ out:
}
int32_t
-gf_cli3_1_statedump_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_statedump_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_statedump_vol_req req = {0,};
+ gf_cli_req req = {{0,}};
dict_t *options = NULL;
+ int ret = -1;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ options = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_statedump_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, options,
+ GLUSTER_CLI_STATEDUMP_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {0,};
+ dict_t *dict = NULL;
+ int vol_count = 0;;
+ char *volname = NULL;
+ char key[1024] = {0,};
+ int i = 0;
+
+ if (-1 == req->rpc_status)
+ goto out;
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_list (dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("%s", rsp.op_errstr);
+ else {
+ ret = dict_get_int32 (dict, "count", &vol_count);
+ if (ret)
+ goto out;
+
+ if (vol_count == 0) {
+ cli_err ("No volumes present in cluster");
+ goto out;
+ }
+ for (i = 0; i < vol_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret)
+ goto out;
+ cli_out ("%s", volname);
+ }
+ }
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_list_volume (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf_cli_req req = {{0,}};
+
+ if (!frame || !this)
+ goto out;
+
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_LIST_VOLUME, NULL,
+ this, gf_cli_list_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli_clearlocks_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char *lk_summary = NULL;
char *volname = NULL;
- char *option_str = NULL;
- int option_cnt = 0;
+ dict_t *dict = NULL;
+
+ if (-1 == req->rpc_status)
+ goto out;
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+ gf_log ("cli", GF_LOG_DEBUG, "Received response to clear-locks");
+
+ if (rsp.op_ret) {
+ cli_err ("Volume clear-locks unsuccessful");
+ cli_err ("%s", rsp.op_errstr);
+
+ } else {
+ if (!rsp.dict.dict_len) {
+ cli_err ("Possibly no locks cleared");
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to serialize response dictionary");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get volname "
+ "from dictionary");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "lk-summary", &lk_summary);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get lock "
+ "summary from dictionary");
+ goto out;
+ }
+ cli_out ("Volume clear-locks successful");
+ cli_out ("%s", lk_summary);
+
+ }
+
+ ret = rsp.op_ret;
+
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_clearlocks_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ dict_t *options = NULL;
int ret = -1;
if (!frame || !this || !data)
@@ -4044,75 +7521,1153 @@ gf_cli3_1_statedump_volume (call_frame_t *frame, xlator_t *this,
options = data;
- ret = dict_get_str (options, "volname", &volname);
- if (ret)
+ ret = cli_to_glusterd (&req, frame, gf_cli_clearlocks_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, options,
+ GLUSTER_CLI_CLRLOCKS_VOLUME, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+cli_snapshot_remove_reply (gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
+{
+ int32_t ret = -1;
+ char *snap_name = NULL;
+
+ GF_ASSERT (rsp);
+ GF_ASSERT (dict);
+ GF_ASSERT (frame);
+
+ if (rsp->op_ret) {
+ cli_err("snapshot delete: failed: %s",
+ rsp->op_errstr ? rsp->op_errstr :
+ "Please check log file for details");
+ ret = rsp->op_ret;
goto out;
- req.volname = volname;
+ }
- ret = dict_get_str (options, "options", &option_str);
- if (ret)
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get snapname");
+ goto out;
+ }
+
+ cli_out ("snapshot delete: %s: snap removed successfully",
+ snap_name);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+cli_snapshot_config_display (dict_t *dict, gf_cli_rsp *rsp)
+{
+ char buf[PATH_MAX] = "";
+ char *volname = NULL;
+ int ret = -1;
+ int config_command = 0;
+ uint64_t value = 0;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ uint64_t i = 0;
+ uint64_t voldisplaycount = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp);
+
+ if (rsp->op_ret) {
+ cli_err ("Snapshot Config : failed: %s",
+ rsp->op_errstr ? rsp->op_errstr :
+ "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "config-command", &config_command);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch config type");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ /* Ignore the error, as volname is optional */
+
+ if (!volname) {
+ volname = "System";
+ }
+
+ ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit);
+ /* Ignore the error, as the key specified is optional */
+ ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit);
+
+ if (!hard_limit && !soft_limit
+ && config_command != GF_SNAP_CONFIG_DISPLAY) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Could not fetch config-key");
+ goto out;
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ if (hard_limit && soft_limit) {
+ cli_out ("snapshot config: snap-max-hard-limit "
+ "& snap-max-soft-limit for system set "
+ "successfully");
+ } else if (hard_limit){
+ cli_out ("snapshot config: %s "
+ "for snap-max-hard-limit set successfully",
+ volname);
+ } else if (soft_limit) {
+ cli_out ("snapshot config: %s "
+ "for snap-max-soft-limit set successfully",
+ volname);
+ }
+ break;
+
+ case GF_SNAP_CONFIG_DISPLAY :
+ cli_out ("\nSnapshot System Configuration:");
+ ret = dict_get_uint64 (dict, "snap-max-hard-limit",
+ &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "snap_max_hard_limit for %s", volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("snap-max-hard-limit : %"PRIu64, value);
+
+ ret = dict_get_uint64 (dict, "snap-max-soft-limit",
+ &soft_limit);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "snap-max-soft-limit for %s", volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("snap-max-soft-limit : %"PRIu64"%%\n",
+ soft_limit);
+
+ cli_out ("Snapshot Volume Configuration:");
+
+ ret = dict_get_uint64 (dict, "voldisplaycount",
+ &voldisplaycount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not fetch voldisplaycount");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < voldisplaycount; i++) {
+ snprintf (buf, sizeof(buf), "volume%ld-volname", i);
+ ret = dict_get_str (dict, buf, &volname);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ " %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("\nVolume : %s", volname);
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-hard-limit", i);
+ ret = dict_get_uint64 (dict, buf, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ " %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("snap-max-hard-limit : %"PRIu64, value);
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-active-hard-limit", i);
+ ret = dict_get_uint64 (dict, buf, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch"
+ " effective snap_max_hard_limit for "
+ "%s", volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("Effective snap-max-hard-limit : %"PRIu64,
+ value);
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-soft-limit", i);
+ ret = dict_get_uint64 (dict, buf, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ " %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("Effective snap-max-soft-limit : %"PRIu64" "
+ "(%"PRIu64"%%)", value, soft_limit);
+ }
+ break;
+ default :
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is used to print the volume related information
+ * of a snap.
+ *
+ * arg - 0, dict : Response Dictionary.
+ * arg - 1, prefix str : snaplist.snap{0..}.vol{0..}.*
+ */
+int
+cli_get_each_volinfo_in_snap (dict_t *dict, char *keyprefix,
+ gf_boolean_t snap_driven) {
+ char key[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int value = 0;
+ int ret = -1;
+ char indent[5] = "\t";
+ char *volname = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (keyprefix);
+
+ if (snap_driven) {
+ ret = snprintf (key, sizeof (key), "%s.volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent,
+ "Snap Volume Name", ":", get_buffer);
+
+ ret = snprintf (key, sizeof (key),
+ "%s.origin-volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_log ("cli", GF_LOG_WARNING, "Failed to get %s", key);
+ cli_out ("%-12s", "Origin:");
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent,
+ "Origin Volume name", ":", volname);
+
+
+ ret = snprintf (key, sizeof (key), "%s.snapcount",
+ keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s%s %s %s %d", indent, "Snaps taken for",
+ volname, ":", value);
+
+ ret = snprintf (key, sizeof (key), "%s.snaps-available",
+ keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s%s %s %s %d", indent, "Snaps available for",
+ volname, ":", value);
+ }
+
+
+ ret = snprintf (key, sizeof (key), "%s.vol-status", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Status",
+ ":", get_buffer);
+out :
+ return ret;
+}
+
+/* This function is used to print snap related information
+ * arg - 0, dict : Response dictionary.
+ * arg - 1, prefix_str : snaplist.snap{0..}.*
+ */
+int
+cli_get_volinfo_in_snap (dict_t *dict, char *keyprefix) {
+
+ char key[PATH_MAX] = "";
+ int i = 0;
+ int volcount = 0;
+ int ret = -1;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (keyprefix);
+
+ ret = snprintf (key, sizeof (key), "%s.vol-count", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &volcount);
+ for (i = 1 ; i <= volcount ; i++) {
+ ret = snprintf (key, sizeof (key),
+ "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap (dict, key, _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not list "
+ "details of volume in a snap");
+ goto out;
+ }
+ cli_out (" ");
+ }
+
+out :
+ return ret;
+}
+
+int
+cli_get_each_snap_info (dict_t *dict, char *prefix_str,
+ gf_boolean_t snap_driven) {
+ char key_buffer[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int ret = -1;
+ char indent[5] = "";
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix_str);
+
+ if (!snap_driven)
+ strcat (indent, "\t");
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snapname",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to fetch snapname %s ",
+ key_buffer);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Snapshot",
+ ":", get_buffer);
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snap-id",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to fetch snap-id %s ",
+ key_buffer);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Snap UUID",
+ ":", get_buffer);
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snap-desc",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (!ret) {
+ /* Ignore error for description */
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent,
+ "Description", ":", get_buffer);
+ }
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snap-time",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to fetch snap-time %s ",
+ prefix_str);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Created",
+ ":", get_buffer);
+
+ if (snap_driven) {
+ cli_out ("%-12s", "Snap Volumes:\n");
+ ret = cli_get_volinfo_in_snap (dict, prefix_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to list details "
+ "of the snaps");
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+/* This is a generic function to print snap related information.
+ * arg - 0, dict : Response Dictionary
+ */
+int
+cli_call_snapshot_info (dict_t *dict, gf_boolean_t bool_snap_driven) {
+ int snap_count = 0;
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_int32 (dict, "snap-count", &snap_count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get snap-count");
+ goto out;
+ }
+
+ if (snap_count == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 1 ; i <= snap_count ; i++) {
+ ret = snprintf (key, sizeof (key), "snap%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_snap_info (dict, key, bool_snap_driven);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to print snap details");
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+int
+cli_get_snaps_in_volume (dict_t *dict) {
+ int ret = -1;
+ int i = 0;
+ int count = 0;
+ int avail = 0;
+ char key[PATH_MAX] = "";
+ char *get_buffer = NULL;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "origin-volname", &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch origin-volname");
+ goto out;
+ }
+ cli_out (INDENT_MAIN_HEAD "%s", "Volume Name", ":", get_buffer);
+
+ ret = dict_get_int32 (dict, "snap-count", &avail);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch snap-count");
+ goto out;
+ }
+ cli_out (INDENT_MAIN_HEAD "%d", "Snaps Taken", ":", avail);
+
+ ret = dict_get_int32 (dict, "snaps-available", &count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch snaps-available");
+ goto out;
+ }
+ cli_out (INDENT_MAIN_HEAD "%d", "Snaps Available", ":", count);
+
+ for (i = 1 ; i <= avail ; i++) {
+ snprintf (key, sizeof (key), "snap%d", i);
+ ret = cli_get_each_snap_info (dict, key, _gf_false);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to print snap details");
+ goto out;
+ }
+
+ ret = snprintf (key, sizeof (key), "snap%d.vol1", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap (dict, key, _gf_false);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get volume "
+ "related information");
+ goto out;
+ }
+
+ cli_out (" ");
+ }
+out :
+ return ret;
+}
+
+int
+cli_snapshot_list (dict_t *dict) {
+ int snapcount = 0;
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int i = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_int32 (dict, "snap-count", &snapcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch snap count");
goto out;
- req.options = option_str;
+ }
+
+ if (snapcount == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 1 ; i <= snapcount ; i++) {
+ ret = snprintf (key, sizeof (key), "snapname%d",i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get %s ", key);
+ goto out;
+ } else {
+ cli_out ("%s", get_buffer);
+ }
+ }
+out :
+ return ret;
+}
+
+int
+cli_get_snap_volume_status (dict_t *dict, char *key_prefix)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+ int brickcount = 0;
+ int i = 0;
+ int pid = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (key_prefix);
+
+ ret = snprintf (key, sizeof (key), "%s.brickcount", key_prefix);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_int32 (dict, key, &brickcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to fetch brickcount");
+ goto out;
+ }
+
+ for ( i = 0 ; i < brickcount ; i++ ) {
+ ret = snprintf (key, sizeof (key), "%s.brick%d.path",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Brick Path");
+ continue;
+ }
+ cli_out ("\n\t%-17s %s %s", "Brick Path", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.vgname",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Volume Group");
+ cli_out ("\t%-17s %s %s", "Volume Group", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "Volume Group", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.status",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Brick Running");
+ cli_out ("\t%-17s %s %s", "Brick Running", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "Brick Running", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.pid",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get pid");
+ cli_out ("\t%-17s %s %s", "Brick PID", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %d", "Brick PID", ":", pid);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.data",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Data Percent");
+ cli_out ("\t%-17s %s %s", "Data Percentage", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "Data Percentage", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.lvsize",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO, "Unable to get LV Size");
+ cli_out ("\t%-17s %s %s", "LV Size", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "LV Size", ":", buffer);
+
+ }
+out :
+ return ret;
+}
+
+
+
+int
+cli_get_single_snap_status (dict_t *dict, char *keyprefix)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ int i = 0;
+ int volcount = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (keyprefix);
+
+ ret = snprintf (key, sizeof (key), "%s.snapname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get snapname");
+ goto out;
+ }
+ cli_out ("\nSnap Name : %s", get_buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.uuid", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get snap UUID");
+ goto out;
+ }
+ cli_out ("Snap UUID : %s", get_buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &volcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get volume count");
+ goto out;
+ }
+
+ for (i = 0 ; i < volcount ; i++) {
+ ret = snprintf (key, sizeof (key), "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = cli_get_snap_volume_status (dict, key);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not get snap volume status");
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+int
+cli_snap_status_all (dict_t *dict) {
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ int snapcount = 0;
+ int i = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_int32 (dict, "status.snapcount", &snapcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
+ }
+
+ if (snapcount == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 0 ; i < snapcount; i++) {
+ ret = snprintf (key, sizeof (key), "status.snap%d",i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_single_snap_status (dict, key);
+ }
+out:
+ return ret;
+}
+
+
+int
+cli_snapshot_status_display (dict_t *dict, gf_cli_rsp *rsp)
+{
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int status_cmd = -1;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp);
+
+ if (rsp->op_ret) {
+ cli_err ("Snapshot Status : failed: %s",
+ rsp->op_errstr ? rsp->op_errstr :
+ "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "cmd", &status_cmd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch status type");
+ goto out;
+ }
+ switch (status_cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL :
+ {
+ ret = cli_snap_status_all (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of all snap");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_STATUS_TYPE_SNAP :
+ {
+ ret = snprintf (key, sizeof (key), "status.snap0");
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_single_snap_status (dict, key);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of snap");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_STATUS_TYPE_VOL :
+ {
+ ret = cli_snap_status_all (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of snap in a volume");
+ goto out;
+ }
+ break;
+ }
+ default :
+ break;
+ }
+out :
+ return ret;
+}
+
+int
+gf_cli_snapshot_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ char *snap_name = NULL;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ gf_boolean_t snap_driven = _gf_false;
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- ret = dict_get_int32 (options, "option-cnt", &option_cnt);
if (ret)
goto out;
- req.option_cnt = option_cnt;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STATEDUMP_VOLUME, NULL,
- this, gf_cli3_1_statedump_volume_cbk,
- (xdrproc_t)xdr_gf1_cli_statedump_vol_req);
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to get type");
+ goto out;
+ }
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ if (rsp.op_ret) {
+ cli_err("snapshot create: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to get snap name");
+ goto out;
+ }
+ cli_out ("snapshot create: %s: snap created successfully",
+ snap_name);
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ /* TODO: Check if rsp.op_ret needs to be checked here. Or is
+ * it ok to check this in the start of the function where we
+ * get rsp.*/
+ if (rsp.op_ret) {
+ cli_err("snapshot restore: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to get snap name");
+ goto out;
+ }
+
+ cli_out ("Snapshot restore: %s: Snap restored "
+ "successfully", snap_name);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_INFO:
+ if (rsp.op_ret) {
+ cli_err ("Snapshot info : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ snap_driven = dict_get_str_boolean (dict, "snap-driven",
+ _gf_false);
+ if (snap_driven == _gf_true) {
+ ret = cli_call_snapshot_info (dict, snap_driven);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Snapshot info failed");
+ goto out;
+ }
+ } else if (snap_driven == _gf_false) {
+ ret = cli_get_snaps_in_volume (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Snapshot info failed");
+ goto out;
+ }
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = cli_snapshot_config_display (dict, &rsp);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to display "
+ "snapshot config output.");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ if (rsp.op_ret) {
+ cli_err ("Snapshot list : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = cli_snapshot_list (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to display "
+ "snapshot list");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = cli_snapshot_remove_reply (&rsp, dict, frame);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to delete snap");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = cli_snapshot_status_display (dict, &rsp);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to display "
+ "snapshot status output.");
+ goto out;
+ }
+ break;
+
+ default:
+ cli_err ("Unknown command executed");
+ ret = -1;
+ goto out;
+ }
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
+
+ return ret;
+}
+
+int32_t
+gf_cli_snapshot (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ dict_t *options = NULL;
+ int ret = -1;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ options = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_snapshot_cbk,
+ (xdrproc_t) xdr_gf_cli_req, options,
+ GLUSTER_CLI_SNAP, this, cli_rpc_prog,
+ NULL);
out:
- if (options)
- dict_destroy (options);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int
+cli_to_glusterd (gf_cli_req *req, call_frame_t *frame,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc, dict_t *dict,
+ int procnum, xlator_t *this, rpc_clnt_prog_t *prog,
+ struct iobref *iobref)
+{
+ int ret = 0;
+ size_t len = 0;
+ char *cmd = NULL;
+ int i = 0;
+ const char **words = NULL;
+ cli_local_t *local = NULL;
+
+ if (!this || !frame || !dict) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!frame->local) {
+ ret = -1;
+ goto out;
+ }
+
+ local = frame->local;
+
+ if (!local->words) {
+ ret = -1;
+ goto out;
+ }
+
+ words = local->words;
+
+ while (words[i])
+ len += strlen (words[i++]) + 1;
+
+ cmd = GF_CALLOC (1, len, gf_common_mt_char);
+
+ if (!cmd) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; words[i]; i++) {
+ strncat (cmd, words[i], strlen (words[i]));
+ if (words[i+1] != NULL)
+ strncat (cmd, " ", strlen (" "));
+ }
+
+ cmd [len - 1] = '\0';
+
+ ret = dict_set_dynstr (dict, "cmd-str", cmd);
+ if (ret)
+ goto out;
+
+ ret = dict_allocate_and_serialize (dict, &(req->dict).dict_val,
+ &(req->dict).dict_len);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = cli_cmd_submit (req, frame, prog, procnum, iobref, this,
+ cbkfn, (xdrproc_t) xdrproc);
+
+out:
return ret;
+
}
struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_NULL] = {"NULL", NULL },
- [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli3_1_probe},
- [GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli3_1_deprobe},
- [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", gf_cli3_1_list_friends},
- [GLUSTER_CLI_CREATE_VOLUME] = {"CREATE_VOLUME", gf_cli3_1_create_volume},
- [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", gf_cli3_1_delete_volume},
- [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", gf_cli3_1_start_volume},
- [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", gf_cli3_1_stop_volume},
- [GLUSTER_CLI_RENAME_VOLUME] = {"RENAME_VOLUME", gf_cli3_1_rename_volume},
- [GLUSTER_CLI_DEFRAG_VOLUME] = {"DEFRAG_VOLUME", gf_cli3_1_defrag_volume},
- [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", gf_cli3_1_get_volume},
- [GLUSTER_CLI_GET_NEXT_VOLUME] = {"GET_NEXT_VOLUME", gf_cli3_1_get_next_volume},
- [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", gf_cli3_1_set_volume},
- [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", gf_cli3_1_add_brick},
- [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", gf_cli3_1_remove_brick},
- [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", gf_cli3_1_replace_brick},
- [GLUSTER_CLI_LOG_FILENAME] = {"LOG FILENAME", gf_cli3_1_log_filename},
- [GLUSTER_CLI_LOG_LOCATE] = {"LOG LOCATE", gf_cli3_1_log_locate},
- [GLUSTER_CLI_LOG_ROTATE] = {"LOG ROTATE", gf_cli3_1_log_rotate},
- [GLUSTER_CLI_GETSPEC] = {"GETSPEC", gf_cli3_1_getspec},
- [GLUSTER_CLI_PMAP_PORTBYBRICK] = {"PMAP PORTBYBRICK", gf_cli3_1_pmap_b2p},
- [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", gf_cli3_1_sync_volume},
- [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", gf_cli3_1_reset_volume},
- [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", gf_cli3_1_fsm_log},
- [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", gf_cli3_1_gsync_set},
- [GLUSTER_CLI_PROFILE_VOLUME] = {"PROFILE_VOLUME", gf_cli3_1_profile_volume},
- [GLUSTER_CLI_QUOTA] = {"QUOTA", gf_cli3_1_quota},
- [GLUSTER_CLI_TOP_VOLUME] = {"TOP_VOLUME", gf_cli3_1_top_volume},
- [GLUSTER_CLI_LOG_LEVEL] = {"VOLUME_LOGLEVEL", gf_cli3_1_log_level},
- [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli3_1_getwd},
- [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli3_1_status_volume},
- [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli3_1_mount},
- [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli3_1_umount},
- [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli3_1_heal_volume},
- [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", gf_cli3_1_statedump_volume},
+ [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli_probe},
+ [GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli_deprobe},
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", gf_cli_list_friends},
+ [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", gf_cli3_1_uuid_reset},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", gf_cli3_1_uuid_get},
+ [GLUSTER_CLI_CREATE_VOLUME] = {"CREATE_VOLUME", gf_cli_create_volume},
+ [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", gf_cli_delete_volume},
+ [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", gf_cli_start_volume},
+ [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", gf_cli_stop_volume},
+ [GLUSTER_CLI_RENAME_VOLUME] = {"RENAME_VOLUME", gf_cli_rename_volume},
+ [GLUSTER_CLI_DEFRAG_VOLUME] = {"DEFRAG_VOLUME", gf_cli_defrag_volume},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", gf_cli_get_volume},
+ [GLUSTER_CLI_GET_NEXT_VOLUME] = {"GET_NEXT_VOLUME", gf_cli_get_next_volume},
+ [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", gf_cli_set_volume},
+ [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", gf_cli_add_brick},
+ [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", gf_cli_remove_brick},
+ [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", gf_cli_replace_brick},
+ [GLUSTER_CLI_LOG_ROTATE] = {"LOG ROTATE", gf_cli_log_rotate},
+ [GLUSTER_CLI_GETSPEC] = {"GETSPEC", gf_cli_getspec},
+ [GLUSTER_CLI_PMAP_PORTBYBRICK] = {"PMAP PORTBYBRICK", gf_cli_pmap_b2p},
+ [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", gf_cli_sync_volume},
+ [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", gf_cli_reset_volume},
+ [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", gf_cli_fsm_log},
+ [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", gf_cli_gsync_set},
+ [GLUSTER_CLI_PROFILE_VOLUME] = {"PROFILE_VOLUME", gf_cli_profile_volume},
+ [GLUSTER_CLI_QUOTA] = {"QUOTA", gf_cli_quota},
+ [GLUSTER_CLI_TOP_VOLUME] = {"TOP_VOLUME", gf_cli_top_volume},
+ [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli_getwd},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli_status_volume},
+ [GLUSTER_CLI_STATUS_ALL] = {"STATUS_ALL", gf_cli_status_volume_all},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli_mount},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli_umount},
+ [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli_heal_volume},
+ [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", gf_cli_statedump_volume},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", gf_cli_list_volume},
+ [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", gf_cli_clearlocks_volume},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", gf_cli_copy_file},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", gf_cli_sys_exec},
+ [GLUSTER_CLI_SNAP] = {"SNAP", gf_cli_snapshot},
};
struct rpc_clnt_program cli_prog = {
.progname = "Gluster CLI",
.prognum = GLUSTER_CLI_PROGRAM,
.progver = GLUSTER_CLI_VERSION,
- .numproc = GLUSTER_CLI_PROCCNT,
+ .numproc = GLUSTER_CLI_MAXVALUE,
.proctable = gluster_cli_actors,
};
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
new file mode 100644
index 000000000..d8884d44b
--- /dev/null
+++ b/cli/src/cli-xml-output.c
@@ -0,0 +1,3772 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdlib.h>
+#include "cli.h"
+#include "cli1-xdr.h"
+#include "run.h"
+#include "compat.h"
+#include "syscall.h"
+
+
+enum gf_task_types {
+ GF_TASK_TYPE_REBALANCE,
+ GF_TASK_TYPE_REMOVE_BRICK
+};
+
+/*
+ * IMPORTANT NOTE:
+ * All exported functions in this file which use libxml need use a
+ * #if (HAVE_LIB_XML), #else, #endif
+ * For eg,
+ * int exported_func () {
+ * #if (HAVE_LIB_XML)
+ * <Stuff using libxml>
+ * #else
+ * return 0;
+ * #endif
+ * }
+ *
+ * All other functions, which are called internally within this file need to be
+ * within #if (HAVE_LIB_XML), #endif statements
+ * For eg,
+ * #if (HAVE_LIB_XML)
+ * int internal_func ()
+ * {
+ * }
+ * #endif
+ *
+ * Following the above formate ensures that all xml related code is compliled
+ * only when libxml2 is present, and also keeps the rest of the codebase free
+ * of #if (HAVE_LIB_XML)
+ */
+
+
+#if (HAVE_LIB_XML)
+
+#include <libxml/encoding.h>
+#include <libxml/xmlwriter.h>
+
+#define XML_RET_CHECK_AND_GOTO(ret, label) do { \
+ if (ret < 0) { \
+ ret = -1; \
+ goto label; \
+ } \
+ else \
+ ret = 0; \
+ }while (0) \
+
+int
+cli_begin_xml_output (xmlTextWriterPtr *writer, xmlDocPtr *doc)
+{
+ int ret = -1;
+
+ *writer = xmlNewTextWriterDoc (doc, 0);
+ if (writer == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterStartDocument (*writer, "1.0", "UTF-8", "yes");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <cliOutput> */
+ ret = xmlTextWriterStartElement (*writer, (xmlChar *)"cliOutput");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_end_xml_output (xmlTextWriterPtr writer, xmlDocPtr doc)
+{
+ int ret = -1;
+
+ /* </cliOutput> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterEndDocument (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ /* Dump xml document to stdout and pretty format it */
+ xmlSaveFormatFileEnc ("-", doc, "UTF-8", 1);
+
+ xmlFreeTextWriter (writer);
+ xmlFreeDoc (doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_common (xmlTextWriterPtr writer, int op_ret, int op_errno,
+ char *op_errstr)
+{
+ int ret = -1;
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opRet",
+ "%d", op_ret);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opErrno",
+ "%d", op_errno);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opErrstr",
+ "%s", op_errstr);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (op) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"cliOp",
+ "%s", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ if (str) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"output",
+ "%s", str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_data_pair (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ int ret = -1;
+ xmlTextWriterPtr *writer = NULL;
+
+ writer = (xmlTextWriterPtr *)data;
+
+ ret = xmlTextWriterWriteFormatElement (*writer, (xmlChar *)key,
+ "%s", value->data);
+
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_dict ( char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <"op"> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (dict)
+ dict_foreach (dict, cli_xml_output_data_pair, &writer);
+
+ /* </"op"> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_status_common (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int *online,
+ gf_boolean_t *node_present)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ char *path = NULL;
+ int port = 0;
+ int status = 0;
+ int pid = 0;
+ char key[1024] = {0,};
+
+ snprintf (key, sizeof (key), "brick%d.hostname", brick_index);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret) {
+ *node_present = _gf_false;
+ goto out;
+ }
+ *node_present = _gf_true;
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", brick_index);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"path",
+ "%s", path);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", brick_index);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ *online = status;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.port", brick_index);
+ ret = dict_get_int32 (dict, key, &port);
+ if (ret)
+ goto out;
+
+ /* If the process is either offline or doesn't provide a port (shd)
+ * port = "N/A"
+ * else print the port number of the process.
+ */
+
+ if (*online == 1 && port != 0)
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"port",
+ "%d", port);
+ else
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"port",
+ "%s", "N/A");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.pid", brick_index);
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"pid",
+ "%d", pid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_detail (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ uint64_t size_total = 0;
+ uint64_t size_free = 0;
+ char *device = NULL;
+ uint64_t block_size = 0;
+ char *mnt_options = NULL;
+ char *fs_name = NULL;
+ char *inode_size = NULL;
+ uint64_t inodes_total = 0;
+ uint64_t inodes_free = 0;
+ char key[1024] = {0,};
+
+ snprintf (key, sizeof (key), "brick%d.total", brick_index);
+ ret = dict_get_uint64 (dict, key, &size_total);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"sizeTotal",
+ "%"PRIu64, size_total);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.free", brick_index);
+ ret = dict_get_uint64 (dict, key, &size_free);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"sizeFree",
+ "%"PRIu64, size_free);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.device", brick_index);
+ ret = dict_get_str (dict, key, &device);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"device",
+ "%s", device);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.block_size", brick_index);
+ ret = dict_get_uint64 (dict, key, &block_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"blockSize",
+ "%"PRIu64, block_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mnt_options", brick_index);
+ ret = dict_get_str (dict, key, &mnt_options);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"mntOptions",
+ "%s", mnt_options);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.fs_name", brick_index);
+ ret = dict_get_str (dict, key, &fs_name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fsName",
+ "%s", fs_name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* inode details are only available for ext 2/3/4 & xfs */
+ if (!IS_EXT_FS(fs_name) || strcmp (fs_name, "xfs")) {
+ ret = 0;
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.inode_size", brick_index);
+ ret = dict_get_str (dict, key, &inode_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodeSize",
+ "%s", fs_name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.total_inodes", brick_index);
+ ret = dict_get_uint64 (dict, key, &inodes_total);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodesTotal",
+ "%"PRIu64, inodes_total);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.free_inodes", brick_index);
+ ret = dict_get_uint64 (dict, key, &inodes_free);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodesFree",
+ "%"PRIu64, inodes_free);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_mempool (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int mempool_count = 0;
+ char *name = NULL;
+ int hotcount = 0;
+ int coldcount = 0;
+ uint64_t paddedsizeof = 0;
+ uint64_t alloccount = 0;
+ int maxalloc = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <mempool> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"mempool");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.mempool-count", prefix);
+ ret = dict_get_int32 (dict, key, &mempool_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%d", mempool_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < mempool_count; i++) {
+ /* <pool> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"pool");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.name", prefix, i);
+ ret = dict_get_str (dict, key, &name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"name",
+ "%s", name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.hotcount", prefix, i);
+ ret = dict_get_int32 (dict, key, &hotcount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"hotCount",
+ "%d", hotcount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.coldcount", prefix, i);
+ ret = dict_get_int32 (dict, key, &coldcount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"coldCount",
+ "%d", coldcount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.paddedsizeof",
+ prefix, i);
+ ret = dict_get_uint64 (dict, key, &paddedsizeof);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"padddedSizeOf", "%"PRIu64,
+ paddedsizeof);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.alloccount", prefix, i);
+ ret = dict_get_uint64 (dict, key, &alloccount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"allocCount",
+ "%"PRIu64, alloccount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.max_alloc", prefix, i);
+ ret = dict_get_int32 (dict, key, &maxalloc);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"maxAlloc",
+ "%d", maxalloc);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.pool-misses", prefix, i);
+ ret = dict_get_uint64 (dict, key, &alloccount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"poolMisses",
+ "%"PRIu64, alloccount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.max-stdalloc", prefix, i);
+ ret = dict_get_int32 (dict, key, &maxalloc);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"maxStdAlloc",
+ "%d", maxalloc);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ /* </pool> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </mempool> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_mem (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int arena = 0;
+ int ordblks = 0;
+ int smblks = 0;
+ int hblks = 0;
+ int hblkhd = 0;
+ int usmblks = 0;
+ int fsmblks = 0;
+ int uordblks = 0;
+ int fordblks = 0;
+ int keepcost = 0;
+ char key[1024] = {0,};
+
+ /* <memStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"memStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <mallinfo> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"mallinfo");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.mallinfo.arena", brick_index);
+ ret = dict_get_int32 (dict, key, &arena);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"arena",
+ "%d", arena);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.ordblks", brick_index);
+ ret = dict_get_int32 (dict, key, &ordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"ordblks",
+ "%d", ordblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.smblks", brick_index);
+ ret = dict_get_int32 (dict, key, &smblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"smblks",
+ "%d", smblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.hblks", brick_index);
+ ret = dict_get_int32 (dict, key, &hblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hblks",
+ "%d", hblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.hblkhd", brick_index);
+ ret = dict_get_int32 (dict, key, &hblkhd);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hblkhd",
+ "%d", hblkhd);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.usmblks", brick_index);
+ ret = dict_get_int32 (dict, key, &usmblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"usmblks",
+ "%d", usmblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.fsmblks", brick_index);
+ ret = dict_get_int32 (dict, key, &fsmblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fsmblks",
+ "%d", fsmblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.uordblks", brick_index);
+ ret = dict_get_int32 (dict, key, &uordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"uordblks",
+ "%d", uordblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.fordblks", brick_index);
+ ret = dict_get_int32 (dict, key, &fordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fordblks",
+ "%d", fordblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.keepcost", brick_index);
+ ret = dict_get_int32 (dict, key, &keepcost);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"keepcost",
+ "%d", keepcost);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </mallinfo> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d", brick_index);
+ ret = cli_xml_output_vol_status_mempool (writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </memStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_clients (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int client_count = 0;
+ char *hostname = NULL;
+ uint64_t bytes_read = 0;
+ uint64_t bytes_write = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <clientsStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"clientsStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.clientcount", brick_index);
+ ret = dict_get_int32 (dict, key, &client_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"clientCount",
+ "%d", client_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < client_count; i++) {
+ /* <client> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"client");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.client%d.hostname",
+ brick_index, i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.client%d.bytesread",
+ brick_index, i);
+ ret = dict_get_uint64 (dict, key, &bytes_read);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"bytesRead",
+ "%"PRIu64, bytes_read);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.client%d.byteswrite",
+ brick_index, i);
+ ret = dict_get_uint64 (dict, key, &bytes_write);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"bytesWrite",
+ "%"PRIu64, bytes_write);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </client> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </clientsStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_inode_entry (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ char *gfid = NULL;
+ uint64_t nlookup = 0;
+ uint32_t ref = 0;
+ int ia_type = 0;
+ char key[1024] = {0,};
+
+ /* <inode> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"inode");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.gfid", prefix);
+ ret = dict_get_str (dict, key, &gfid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"gfid",
+ "%s", gfid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key,0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.nlookup", prefix);
+ ret = dict_get_uint64 (dict, key, &nlookup);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nLookup",
+ "%"PRIu64, nlookup);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key,0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ref", prefix);
+ ret = dict_get_uint32 (dict, key, &ref);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"ref",
+ "%"PRIu32, ref);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key,0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ia_type", prefix);
+ ret = dict_get_int32 (dict, key, &ia_type);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"iaType",
+ "%d", ia_type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </inode> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_itable (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ uint32_t active_size = 0;
+ uint32_t lru_size = 0;
+ uint32_t purge_size = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ snprintf (key, sizeof (key), "%s.active_size", prefix);
+ ret = dict_get_uint32 (dict, key, &active_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"activeSize",
+ "%"PRIu32, active_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ if (active_size != 0) {
+ /* <active> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"active");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < active_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.active%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry
+ (writer, dict, key);
+ if (ret)
+ goto out;
+ }
+ /* </active> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.lru_size", prefix);
+ ret = dict_get_uint32 (dict, key, &lru_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"lruSize",
+ "%"PRIu32, lru_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ if (lru_size != 0) {
+ /* <lru> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"lru");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < lru_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.lru%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry
+ (writer, dict, key);
+ if (ret)
+ goto out;
+ }
+ /* </lru> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.purge_size", prefix);
+ ret = dict_get_uint32 (dict, key, &purge_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"purgeSize",
+ "%"PRIu32, purge_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ if (purge_size != 0) {
+ /* <purge> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"purge");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < purge_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.purge%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry
+ (writer, dict, key);
+ if (ret)
+ goto out;
+ }
+ /* </purge> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_inode (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int conn_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <inodeStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"inodeStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.conncount", brick_index);
+ ret = dict_get_int32 (dict, key, &conn_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"connections",
+ "%d", conn_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < conn_count; i++) {
+ /* <connection> */
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"connection");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conn%d.itable",
+ brick_index, i);
+ ret = cli_xml_output_vol_status_itable (writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </connection> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </inodeStatus> */
+ ret= xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_fdtable (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int refcount = 0;
+ uint32_t maxfds = 0;
+ int firstfree = 0;
+ int openfds = 0;
+ int fd_pid = 0;
+ int fd_refcount = 0;
+ int fd_flags = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <fdTable> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fdTable");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_get_int32 (dict, key, &refcount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"refCount",
+ "%d", refcount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.maxfds", prefix);
+ ret = dict_get_uint32 (dict, key, &maxfds);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"maxFds",
+ "%"PRIu32, maxfds);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.firstfree", prefix);
+ ret = dict_get_int32 (dict, key, &firstfree);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"firstFree",
+ "%d", firstfree);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.openfds", prefix);
+ ret = dict_get_int32 (dict, key, &openfds);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"openFds",
+ "%d", openfds);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < maxfds; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.pid", prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_pid);
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.refcount",
+ prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_refcount);
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.flags", prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_flags);
+ if (ret)
+ continue;
+
+ /* <fd> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fd");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"entry",
+ "%d", i+1);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"pid",
+ "%d", fd_pid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"refCount",
+ "%d", fd_refcount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"flags",
+ "%d", fd_flags);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </fd> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </fdTable> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_fd (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int conn_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <fdStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fdStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.conncount", brick_index);
+ ret = dict_get_int32 (dict, key, &conn_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"connections",
+ "%d", conn_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < conn_count; i++) {
+ /* <connection> */
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"connection");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conn%d.fdtable",
+ brick_index, i);
+ ret = cli_xml_output_vol_status_fdtable (writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </connection> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </fdStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_callframe (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int ref_count = 0;
+ char *translator = NULL;
+ int complete = 0;
+ char *parent = NULL;
+ char *wind_from = NULL;
+ char *wind_to = NULL;
+ char *unwind_from = NULL;
+ char *unwind_to = NULL;
+ char key[1024] = {0,};
+
+ /* <callFrame> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"callFrame");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_get_int32 (dict, key, &ref_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"refCount",
+ "%d", ref_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.translator", prefix);
+ ret = dict_get_str (dict, key, &translator);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"translator",
+ "%s", translator);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.complete", prefix);
+ ret = dict_get_int32 (dict, key, &complete);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"complete",
+ "%d", complete);
+ XML_RET_CHECK_AND_GOTO (ret ,out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.parent", prefix);
+ ret = dict_get_str (dict, key, &parent);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"parent",
+ "%s", parent);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windfrom", prefix);
+ ret = dict_get_str (dict, key, &wind_from);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"windFrom",
+ "%s", wind_from);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windto", prefix);
+ ret = dict_get_str (dict, key, &wind_to);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"windTo",
+ "%s", wind_to);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
+ ret = dict_get_str (dict, key, &unwind_from);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"unwindFrom",
+ "%s", unwind_from);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwindto", prefix);
+ ret = dict_get_str (dict, key, &unwind_to);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"unwindTo",
+ "%s", unwind_to);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </callFrame> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_callstack (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int uid = 0;
+ int gid = 0;
+ int pid = 0;
+ uint64_t unique = 0;
+ int frame_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <callStack> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"callStack");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.uid", prefix);
+ ret = dict_get_int32 (dict, key, &uid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"uid",
+ "%d", uid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.gid", prefix);
+ ret = dict_get_int32 (dict, key, &gid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"gid",
+ "%d", gid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pid", prefix);
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"pid",
+ "%d", pid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unique", prefix);
+ ret = dict_get_uint64 (dict, key, &unique);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"unique",
+ "%"PRIu64, unique);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.count", prefix);
+ ret = dict_get_int32 (dict, key, &frame_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"frameCount",
+ "%d", frame_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < frame_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.frame%d", prefix, i);
+ ret = cli_xml_output_vol_status_callframe (writer, dict,
+ key);
+ if (ret)
+ goto out;
+ }
+
+ /* </callStack> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_callpool (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int call_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <callpoolStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"callpoolStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.callpool.count", brick_index);
+ ret = dict_get_int32 (dict, key, &call_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%d", call_count);
+
+ for (i = 0; i < call_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.callpool.stack%d",
+ brick_index, i);
+ ret = cli_xml_output_vol_status_callstack (writer, dict,
+ key);
+ if (ret)
+ goto out;
+ }
+
+ /* </callpoolStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_status_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ ret = cli_begin_xml_output (&(local->writer), &(local->doc));
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_xml_output_common (local->writer, op_ret, op_errno,
+ op_errstr);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volStatus> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *) "volStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volumes> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volumes");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_status_end (cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ /* </volumes> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volStatus> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output (local->writer, local->doc);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_remove_brick_task_params (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ int count = 0;
+ int i = 0;
+ char *brick = NULL;
+
+ /* <params> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"params");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.count", prefix);
+ ret = dict_get_int32 (dict, key, &count);
+ if (ret)
+ goto out;
+
+ for (i = 1; i <= count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.brick%d", prefix, i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"brick",
+ "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ brick = NULL;
+ }
+
+ /* </param> */
+ ret = xmlTextWriterEndElement (writer);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_replace_brick_task_params (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+
+ int ret = -1;
+ char key[1024] = {0,};
+ char *brick = NULL;
+
+ /* <params> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"params");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.src-brick", prefix);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"srcBrick",
+ "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.dst-brick", prefix);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"dstBrick",
+ "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ /* </param> */
+ ret = xmlTextWriterEndElement (writer);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_tasks (cli_local_t *local, dict_t *dict) {
+ int ret = -1;
+ char *task_type = NULL;
+ char *task_id_str = NULL;
+ int status = 0;
+ int tasks = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <tasks> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"tasks");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "tasks", &tasks);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < tasks; i++) {
+ /* <task> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"task");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.type", i);
+ ret = dict_get_str (dict, key, &task_type);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"type",
+ "%s", task_type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.id", i);
+ ret = dict_get_str (dict, key, &task_id_str);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (!strcmp (task_type, "Replace brick")) {
+ if (status) {
+ status = GF_DEFRAG_STATUS_COMPLETE;
+ } else {
+ status = GF_DEFRAG_STATUS_STARTED;
+ }
+ }
+
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"statusStr",
+ "%s",
+ cli_vol_task_status_str[status]);
+
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d", i);
+ if (!strcmp (task_type, "Replace brick")) {
+ ret = cli_xml_output_replace_brick_task_params
+ (local->writer, dict, key);
+ if (ret)
+ goto out;
+ } else if (!strcmp (task_type, "Remove brick")) {
+ ret = cli_xml_output_remove_brick_task_params
+ (local->writer, dict, key);
+ if (ret)
+ goto out;
+ }
+
+
+ /* </task> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </tasks> */
+ ret = xmlTextWriterEndElement (local->writer);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_tasks_detail (cli_local_t *local, dict_t *dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+
+ /*<volume>*/
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"volName", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_xml_output_vol_status_tasks (local, dict);
+ if (ret)
+ goto out;
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_status (cli_local_t *local, dict_t *dict)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ char *volname = NULL;
+ int brick_count = 0;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ uint32_t cmd = GF_CLI_STATUS_NONE;
+ int online = 0;
+ gf_boolean_t node_present = _gf_true;
+ int i;
+
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"volName", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"nodeCount", "%d",
+ brick_count);
+ if (ret)
+ goto out;
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ /* <node> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_xml_output_vol_status_common (local->writer, dict, i,
+ &online, &node_present);
+ if (ret) {
+ if (node_present)
+ goto out;
+ else
+ continue;
+ }
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_DETAIL:
+ ret = cli_xml_output_vol_status_detail (local->writer,
+ dict, i);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_CLI_STATUS_MEM:
+ if (online) {
+ ret = cli_xml_output_vol_status_mem
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ if (online) {
+ ret = cli_xml_output_vol_status_clients
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_INODE:
+ if (online) {
+ ret = cli_xml_output_vol_status_inode
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_FD:
+ if (online) {
+ ret = cli_xml_output_vol_status_fd
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_CALLPOOL:
+ if (online) {
+ ret = cli_xml_output_vol_status_callpool
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+ default:
+ break;
+
+ }
+ /* </node> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* Tasks are only present when a normal volume status call is done on a
+ * single volume or on all volumes
+ */
+ if (((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) &&
+ (cmd & (GF_CLI_STATUS_VOL|GF_CLI_STATUS_ALL))) {
+ ret = cli_xml_output_vol_status_tasks (local, dict);
+ if (ret)
+ goto out;
+ }
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_top_rw_perf (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int member_index)
+{
+ int ret = -1;
+ char *filename = NULL;
+ uint64_t throughput = 0;
+ long int time_sec = 0;
+ long int time_usec = 0;
+ char timestr[256] = {0,};
+ char key[1024] = {0,};
+
+ /* <file> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%d-filename-%d", brick_index,
+ member_index);
+ ret = dict_get_str (dict, key, &filename);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"filename",
+ "%s", filename);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-value-%d", brick_index, member_index);
+ ret = dict_get_uint64 (dict, key, &throughput);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%"PRIu64, throughput);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-time-sec-%d", brick_index,
+ member_index);
+ ret = dict_get_int32 (dict, key, (int32_t *)&time_sec);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-time-usec-%d", brick_index,
+ member_index);
+ ret = dict_get_int32 (dict, key, (int32_t *)&time_usec);
+ if (ret)
+ goto out;
+
+ gf_time_fmt (timestr, sizeof timestr, time_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, time_usec);
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"time",
+ "%s", timestr);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </file> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_top_other (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int member_index)
+{
+ int ret = -1;
+ char *filename = NULL;
+ uint64_t count = 0;
+ char key[1024] = {0,};
+
+ /* <file> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%d-filename-%d", brick_index,
+ member_index);
+ ret = dict_get_str (dict, key, &filename);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"filename",
+ "%s", filename);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-value-%d", brick_index, member_index);
+ ret = dict_get_uint64 (dict, key, &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%"PRIu64, count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </file> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int brick_count = 0;
+ int top_op = GF_CLI_TOP_NONE;
+ char *brick_name = NULL;
+ int members = 0;
+ uint64_t current_open = 0;
+ uint64_t max_open = 0;
+ char *max_open_time = NULL;
+ double throughput = 0.0;
+ double time_taken = 0.0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volTop> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volTop");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"brickCount",
+ "%d", brick_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "1-top-op", &top_op);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"topOp",
+ "%d", top_op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ while (i < brick_count) {
+ i++;
+
+ /* <brick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-brick", i);
+ ret = dict_get_str (dict, key, &brick_name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"name",
+ "%s", brick_name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key , sizeof (key), "%d-members", i);
+ ret = dict_get_int32 (dict, key, &members);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"members",
+ "%d", members);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ switch (top_op) {
+ case GF_CLI_TOP_OPEN:
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-current-open", i);
+ ret = dict_get_uint64 (dict, key, &current_open);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"currentOpen", "%"PRIu64,
+ current_open);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-max-open", i);
+ ret = dict_get_uint64 (dict, key, &max_open);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"maxOpen", "%"PRIu64,
+ max_open);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-max-openfd-time", i);
+ ret = dict_get_str (dict, key, &max_open_time);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"maxOpenTime", "%s",
+ max_open_time);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ case GF_CLI_TOP_READ:
+ case GF_CLI_TOP_WRITE:
+ case GF_CLI_TOP_OPENDIR:
+ case GF_CLI_TOP_READDIR:
+
+ break;
+
+ case GF_CLI_TOP_READ_PERF:
+ case GF_CLI_TOP_WRITE_PERF:
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-throughput", i);
+ ret = dict_get_double (dict, key, &throughput);
+ if (!ret) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-time", i);
+ ret = dict_get_double (dict, key, &time_taken);
+ }
+
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"throughput",
+ "%f", throughput);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"timeTaken",
+ "%f", time_taken);
+ }
+
+ break;
+
+ default:
+ ret = -1;
+ goto out;
+ }
+
+ for (j = 1; j <= members; j++) {
+ if (top_op == GF_CLI_TOP_READ_PERF ||
+ top_op == GF_CLI_TOP_WRITE_PERF) {
+ ret = cli_xml_output_vol_top_rw_perf
+ (writer, dict, i, j);
+ } else {
+ ret = cli_xml_output_vol_top_other
+ (writer, dict, i, j);
+ }
+ if (ret)
+ goto out;
+ }
+
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </volTop> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_profile_stats (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int interval)
+{
+ int ret = -1;
+ uint64_t read_count = 0;
+ uint64_t write_count = 0;
+ uint64_t hits = 0;
+ double avg_latency = 0.0;
+ double max_latency = 0.0;
+ double min_latency = 0.0;
+ uint64_t duration = 0;
+ uint64_t total_read = 0;
+ uint64_t total_write = 0;
+ char key[1024] = {0};
+ int i = 0;
+
+ /* <cumulativeStats> || <intervalStats> */
+ if (interval == -1)
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"cumulativeStats");
+ else
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"intervalStats");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <blockStats> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"blockStats");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < 32; i++) {
+ /* <block> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"block");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"size", "%"PRIu32, (1 << i));
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-read-%d", brick_index,
+ interval, (1 << i));
+ ret = dict_get_uint64 (dict, key, &read_count);
+ if (ret)
+ read_count = 0;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"reads", "%"PRIu64, read_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-write-%d", brick_index,
+ interval, (1 << i));
+ ret = dict_get_uint64 (dict, key, &write_count);
+ if (ret)
+ write_count = 0;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"writes", "%"PRIu64, write_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </block> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </blockStats> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <fopStats> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fopStats");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-hits", brick_index,
+ interval, i);
+ ret = dict_get_uint64 (dict, key, &hits);
+ if (ret)
+ goto cont;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-avglatency", brick_index,
+ interval, i);
+ ret = dict_get_double (dict, key, &avg_latency);
+ if (ret)
+ goto cont;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-minlatency", brick_index,
+ interval, i);
+ ret = dict_get_double (dict, key, &min_latency);
+ if (ret)
+ goto cont;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-maxlatency", brick_index,
+ interval, i);
+ ret = dict_get_double (dict, key, &max_latency);
+ if (ret)
+ goto cont;
+
+ /* <fop> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fop");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"name","%s", gf_fop_list[i]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"hits", "%"PRIu64, hits);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"avgLatency", "%f", avg_latency);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"minLatency", "%f", min_latency);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"maxLatency", "%f", max_latency);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </fop> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+cont:
+ hits = 0;
+ avg_latency = 0.0;
+ min_latency = 0.0;
+ max_latency = 0.0;
+ }
+
+ /* </fopStats> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-duration", brick_index, interval);
+ ret = dict_get_uint64 (dict, key, &duration);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"duration",
+ "%"PRIu64, duration);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-total-read", brick_index, interval);
+ ret = dict_get_uint64 (dict, key, &total_read);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"totalRead",
+ "%"PRIu64, total_read);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-total-write", brick_index, interval);
+ ret = dict_get_uint64 (dict, key, &total_write);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"totalWrite",
+ "%"PRIu64, total_write);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </cumulativeStats> || </intervalStats> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ int op = GF_CLI_STATS_NONE;
+ int brick_count = 0;
+ char *brick_name = NULL;
+ int interval = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volProfile> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volProfile");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"volname",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "op", &op);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"profileOp",
+ "%d", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (op != GF_CLI_STATS_INFO)
+ goto cont;
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"brickCount",
+ "%d", brick_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ while (i < brick_count) {
+ i++;
+
+ /* <brick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%d-brick", i);
+ ret = dict_get_str (dict, key, &brick_name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"brickName", "%s", brick_name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%d-cumulative", i);
+ ret = dict_get_int32 (dict, key, &interval);
+ if (ret == 0) {
+ ret = cli_xml_output_vol_profile_stats
+ (writer, dict, i, interval);
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-interval", i);
+ ret = dict_get_int32 (dict, key, &interval);
+ if (ret == 0) {
+ ret = cli_xml_output_vol_profile_stats
+ (writer, dict, i, interval);
+ if (ret)
+ goto out;
+ }
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+cont:
+ /* </volProfile> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int count = 0;
+ char *volname = NULL;
+ char key[1024] = {0,};
+ int i = 0;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volList> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volList");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%d", count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"volume",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </volList> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_info_option (xmlTextWriterPtr writer, char *substr,
+ char *optstr, char *valstr)
+{
+ int ret = -1;
+ char *ptr1 = NULL;
+ char *ptr2 = NULL;
+
+ ptr1 = substr;
+ ptr2 = optstr;
+
+ while (ptr1) {
+ if (*ptr1 != *ptr2)
+ break;
+ ptr1++;
+ ptr2++;
+ if (!ptr1)
+ goto out;
+ if (!ptr2)
+ goto out;
+ }
+ if (*ptr2 == '\0')
+ goto out;
+
+ /* <option> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"option");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"name",
+ "%s", ptr2);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"value",
+ "%s", valstr);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </option> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+struct tmp_xml_option_logger {
+ char *key;
+ xmlTextWriterPtr writer;
+};
+
+static int
+_output_vol_info_option (dict_t *d, char *k, data_t *v,
+ void *data)
+{
+ int ret = 0;
+ char *ptr = NULL;
+ struct tmp_xml_option_logger *tmp = NULL;
+
+ tmp = data;
+
+ ptr = strstr (k, "option.");
+ if (!ptr)
+ goto out;
+
+ if (!v) {
+ ret = -1;
+ goto out;
+ }
+ ret = cli_xml_output_vol_info_option (tmp->writer, tmp->key, k,
+ v->data);
+
+out:
+ return ret;
+}
+
+int
+cli_xml_output_vol_info_options (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int opt_count = 0;
+ char key[1024] = {0,};
+ struct tmp_xml_option_logger tmp = {0,};
+
+ snprintf (key, sizeof (key), "%s.opt_count", prefix);
+ ret = dict_get_int32 (dict, key, &opt_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"optCount",
+ "%d", opt_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <options> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"options");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ snprintf (key, sizeof (key), "%s.option.", prefix);
+
+ tmp.key = key;
+ tmp.writer = writer;
+ ret = dict_foreach (dict, _output_vol_info_option, &tmp);
+ if (ret)
+ goto out;
+
+ /* </options> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
+{
+#if (HAVE_LIB_XML)
+ int ret = 0;
+ int count = 0;
+ char *volname = NULL;
+ char *volume_id = NULL;
+ char *uuid = NULL;
+ int type = 0;
+ int status = 0;
+ int brick_count = 0;
+ int dist_count = 0;
+ int stripe_count = 0;
+ int replica_count = 0;
+ int transport = 0;
+ char *brick = NULL;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 1;
+ char *caps = NULL;
+ int k __attribute__((unused)) = 0;
+ char *snap_volume = NULL;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ /* <volume> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.name", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.volume_id", i);
+ ret = dict_get_str (dict, key, &volume_id);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"id",
+ "%s", volume_id);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.snap_volume", i);
+ ret = dict_get_str (dict, key, &snap_volume);
+ if (ret)
+ goto out;
+ if (snap_volume) {
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"snapVol",
+ "%s", snap_volume);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret =xmlTextWriterWriteFormatElement
+ (local->writer, (xmlChar *)"statusStr", "%s",
+ cli_vol_status_str[status]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick_count", i);
+ ret = dict_get_int32 (dict, key, &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"brickCount",
+ "%d", brick_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.dist_count", i);
+ ret = dict_get_int32 (dict, key, &dist_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"distCount",
+ "%d", dist_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.stripe_count", i);
+ ret = dict_get_int32 (dict, key, &stripe_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"stripeCount",
+ "%d", stripe_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.replica_count", i);
+ ret = dict_get_int32 (dict, key, &replica_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"replicaCount",
+ "%d", replica_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.type", i);
+ ret = dict_get_int32 (dict, key, &type);
+ if (ret)
+ goto out;
+ /* For Distributed-(stripe,replicate,stipe-replicate) types */
+ if ((type > 0) && (dist_count < brick_count))
+ type += 3;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"type",
+ "%d", type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"typeStr",
+ "%s",
+ cli_vol_type_str[type]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.transport", i);
+ ret = dict_get_int32 (dict, key, &transport);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"transport",
+ "%d", transport);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+#ifdef HAVE_BD_XLATOR
+ /* <xlators> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"xlators");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (k = 0; ; k++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),"volume%d.xlator%d", i, k);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+
+ /* <xlator> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"xlator");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (local->writer, (xmlChar *)"name", "%s", caps);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <capabilities> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)
+ "capabilities");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ j = 0;
+ for (j = 0; ;j++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "volume%d.xlator%d.caps%d", i, k, j);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+ ret = xmlTextWriterWriteFormatElement
+ (local->writer, (xmlChar *)"capability",
+ "%s", caps);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+ /* </capabilities> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </xlator> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+ ret = xmlTextWriterFullEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </xlators> */
+#else
+ caps = 0; /* Avoid compiler warnings when BD not enabled */
+#endif
+ j = 1;
+
+ /* <bricks> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"bricks");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ while (j <= brick_count) {
+ ret = xmlTextWriterStartElement
+ (local->writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d.uuid",
+ i, j);
+ ret = dict_get_str (dict, key, &uuid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatAttribute
+ (local->writer, (xmlChar *)"uuid", "%s",
+ uuid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d", i, j);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatString
+ (local->writer, "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ j++;
+ }
+ /* </bricks> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d", i);
+ ret = cli_xml_output_vol_info_options (local->writer, dict,
+ key);
+ if (ret)
+ goto out;
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ if (volname) {
+ GF_FREE (local->get_vol.volname);
+ local->get_vol.volname = gf_strdup (volname);
+ local->vol_count += count;
+ }
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ GF_ASSERT (local);
+
+ ret = cli_begin_xml_output (&(local->writer), &(local->doc));
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (local->writer, op_ret, op_errno,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volInfo> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volInfo");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volumes> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volumes");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* Init vol count */
+ local->vol_count = 0;
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_info_end (cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ GF_ASSERT (local);
+
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"count",
+ "%d", local->vol_count);
+
+ /* </volumes> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volInfo> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (local->writer, local->doc);
+
+out:
+ gf_log ("cli", GF_LOG_ERROR, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
+ int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int64_t size = 0;
+ int64_t limit_value = 0;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int len = 0;
+ char *size_str = NULL;
+ char path[PATH_MAX] = {0,};
+ char ret_str[1024] = {0,};
+ char value[1024] = {0,};
+ char mountdir[] = "/tmp/mountXXXXXX";
+ char abspath[PATH_MAX] = {0,};
+ runner_t runner = {0,};
+
+ GF_ASSERT (volname);
+ GF_ASSERT (limit_list);
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volQuota> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volQuota");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (!limit_list)
+ goto cont;
+
+ len = strlen (limit_list);
+ if (len == 0)
+ goto cont;
+
+ if (mkdtemp (mountdir) == NULL) {
+ gf_log ("cli", GF_LOG_ERROR, "failed to create a temporary"
+ " mount directory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = runcmd (SBIN_DIR"/glusterfs", "-s", "localhost",
+ "--volfile-id", volname, "-l",
+ DEFAULT_LOG_FILE_DIRECTORY"/quota-list-xml.log",
+ mountdir, NULL);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "failed to mount glusterfs client");
+ ret = -1;
+ goto rm_dir;
+ }
+
+ while (i < len) {
+ j = 0;
+ k = 0;
+ size = 0;
+
+ while (limit_list[i] != ':')
+ path[k++] = limit_list[i++];
+ path[k] = '\0';
+
+ i++;
+
+ while (limit_list[i] != ',' && limit_list[i] != '\0')
+ value[j++] = limit_list[i++];
+ i++;
+
+ snprintf (abspath, sizeof (abspath), "%s/%s", mountdir, path);
+ ret = sys_lgetxattr (abspath, "trusted.limit.list",
+ (void *)ret_str, 4096);
+ if (ret >= 0) {
+ sscanf (ret_str, "%"SCNd64",%"SCNd64, &size,
+ &limit_value);
+ size_str = gf_uint64_2human_readable ((uint64_t)size);
+ }
+
+ /* <quota> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"quota");
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"path", "%s", path);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"limit", "%s", value);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+
+ if (size_str) {
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"size", "%s", size_str);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+ GF_FREE (size_str);
+ } else {
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"size", "%"PRId64, size);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+ }
+
+ /* </quota> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+
+ }
+
+unmount:
+ runinit (&runner);
+ runner_add_args (&runner, "umount",
+#if GF_LINUX_HOST_OS
+ "-l",
+#endif
+ mountdir, NULL);
+ ret = runner_run_reuse (&runner);
+ if (ret)
+ runner_log (&runner, "cli", GF_LOG_WARNING, "error executing");
+ runner_end (&runner);
+
+rm_dir:
+ rmdir (mountdir);
+
+cont:
+ /* </volQuota> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ GF_FREE (size_str);
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_peer_status (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int count = 0;
+ char *uuid = NULL;
+ char *hostname = NULL;
+ int connected = 0;
+ int state_id = 0;
+ char *state_str = NULL;
+ int i = 1;
+ char key[1024] = {0,};
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <peerStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"peerStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (!dict)
+ goto cont;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ while (i <= count) {
+ /* <peer> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"peer");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"uuid",
+ "%s", uuid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.connected", i);
+ ret = dict_get_int32 (dict, key, &connected);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"connected",
+ "%d", connected);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.stateId", i);
+ ret = dict_get_int32 (dict, key, &state_id);
+ if (!ret) {
+ /* ignore */
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"state", "%d", state_id);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.state", i);
+ ret = dict_get_str (dict, key, &state_str);
+ if (!ret) {
+ /* ignore */
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"stateStr", "%s", state_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </peer> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ i++;
+ }
+
+cont:
+ /* </peerStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+/* Used for rebalance stop/status, remove-brick status */
+int
+cli_xml_output_vol_rebalance_status (xmlTextWriterPtr writer, dict_t *dict,
+ enum gf_task_types task_type)
+{
+ int ret = -1;
+ int count = 0;
+ char *node_name = NULL;
+ char *node_uuid = NULL;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookups = 0;
+ int status_rcd = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ uint64_t total_files = 0;
+ uint64_t total_size = 0;
+ uint64_t total_lookups = 0;
+ uint64_t total_failures = 0;
+ uint64_t total_skipped = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int overall_status = -1;
+ double elapsed = 0;
+ double overall_elapsed = 0;
+
+ if (!dict) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nodeCount",
+ "%d", count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ while (i < count) {
+ i++;
+
+ /* <node> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"nodeName",
+ "%s", node_name);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "node-uuid-%d", i);
+ ret = dict_get_str (dict, key, &node_uuid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"id",
+ "%s", node_uuid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "files-%d", i);
+ ret = dict_get_uint64 (dict, key, &files);
+ if (ret)
+ goto out;
+ total_files += files;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"files",
+ "%"PRIu64, files);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "size-%d", i);
+ ret = dict_get_uint64 (dict, key, &size);
+ if (ret)
+ goto out;
+ total_size += size;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"size",
+ "%"PRIu64,size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "lookups-%d", i);
+ ret = dict_get_uint64 (dict, key, &lookups);
+ if (ret)
+ goto out;
+ total_lookups += lookups;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"lookups",
+ "%"PRIu64, lookups);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &failures);
+ if (ret)
+ goto out;
+ total_failures += failures;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"failures",
+ "%"PRIu64, failures);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* skipped-%d is not available for remove brick in dict,
+ so using failures as skipped count in case of remove-brick
+ similar to logic used in CLI(non xml output) */
+ if (task_type == GF_TASK_TYPE_REBALANCE) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "skipped-%d", i);
+ }
+ else {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "failures-%d", i);
+ }
+
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ goto out;
+ total_skipped += skipped;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"skipped",
+ "%"PRIu64, skipped);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "status-%d", i);
+ ret = dict_get_int32 (dict, key, &status_rcd);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"status",
+ "%d", status_rcd);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"statusStr",
+ "%s",
+ cli_vol_task_status_str[status_rcd]);
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_get_double (dict, key, &elapsed);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"runtime",
+ "%.2f", elapsed);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (elapsed > overall_elapsed) {
+ overall_elapsed = elapsed;
+ }
+
+ if (-1 == overall_status)
+ overall_status = status_rcd;
+ else if ((GF_DEFRAG_STATUS_COMPLETE == overall_status ||
+ status_rcd > overall_status) &&
+ (status_rcd != GF_DEFRAG_STATUS_COMPLETE))
+ overall_status = status_rcd;
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </node> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* Aggregate status */
+ /* <aggregate> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"aggregate");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"files",
+ "%"PRIu64, total_files);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"size",
+ "%"PRIu64, total_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"lookups",
+ "%"PRIu64, total_lookups);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"failures",
+ "%"PRIu64, total_failures);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"skipped",
+ "%"PRIu64, total_skipped);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"status",
+ "%d", overall_status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"statusStr",
+ "%s",
+ cli_vol_task_status_str[overall_status]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"runtime",
+ "%.2f", overall_elapsed);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </aggregate> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volRebalance> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRebalance");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op",
+ "%d", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if ((GF_DEFRAG_CMD_STOP == op) || (GF_DEFRAG_CMD_STATUS == op)) {
+ ret = cli_xml_output_vol_rebalance_status (writer, dict,
+ GF_TASK_TYPE_REBALANCE);
+ if (ret)
+ goto out;
+ }
+
+ /* </volRebalance> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volRemoveBrick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRemoveBrick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ if (status_op) {
+ ret = cli_xml_output_vol_rebalance_status (writer, dict,
+ GF_TASK_TYPE_REMOVE_BRICK);
+ if (ret)
+ goto out;
+ }
+
+ /* </volRemoveBrick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ int status = 0;
+ uint64_t files = 0;
+ char *current_file = 0;
+ char *task_id_str = NULL;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volReplaceBrick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volReplaceBrick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op",
+ "%d", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (GF_REPLACE_OP_STATUS == op) {
+ ret = dict_get_int32 (dict, "status", &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_uint64 (dict, "files", &files);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"files",
+ "%"PRIu64, files);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (status)
+ goto cont;
+
+ ret = dict_get_str (dict, "current_file", &current_file);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"currentFile",
+ "%s", current_file);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+cont:
+ /* </volReplaceBrick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_create (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <volCreate> */
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"volCreate");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *) "name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volume-id", &volid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"id",
+ "%s", volid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volCreate> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_generic_volume (char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
+
+ GF_ASSERT (op);
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <"op"> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *) "name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "vol-id", &volid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"id",
+ "%s", volid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </"op"> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_gsync_status (dict_t *dict, xmlTextWriterPtr writer)
+{
+ char master_key[PATH_MAX] = "";
+ char slave_key[PATH_MAX] = "";
+ char status_key[PATH_MAX] = "";
+ char node_key[PATH_MAX] = "";
+ char *master = NULL;
+ char *slave = NULL;
+ char *status = NULL;
+ char *node = NULL;
+ int ret = -1;
+ int gsync_count = 0;
+ int i = 1;
+
+ ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (ret)
+ goto out;
+
+ for (i=1; i <= gsync_count; i++) {
+ snprintf (node_key, sizeof(node_key), "node%d", i);
+ snprintf (master_key, sizeof(master_key), "master%d", i);
+ snprintf (slave_key, sizeof(slave_key), "slave%d", i);
+ snprintf (status_key, sizeof(status_key), "status%d", i);
+
+ ret = dict_get_str (dict, node_key, &node);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, master_key, &master);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, slave_key, &slave);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, status_key, &status);
+ if (ret)
+ goto out;
+
+ /* <pair> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"pair");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"node",
+ "%s", node);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"master",
+ "%s", master);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"slave",
+ "%s", slave);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"status",
+ "%s", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </pair> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ }
+
+out:
+ gf_log ("cli",GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_gsync (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *master = NULL;
+ char *slave = NULL;
+ int type = 0;
+
+ GF_ASSERT (dict);
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <geoRep> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"geoRep");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get type");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"type",
+ "%d", type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"master",
+ "%s", master);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"slave",
+ "%s", slave);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ break;
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ ret = cli_xml_output_vol_gsync_status(dict, writer);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ /* </geoRep> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+out:
+ gf_log ("cli",GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
diff --git a/cli/src/cli.c b/cli/src/cli.c
index 932869038..91b315ff1 100644
--- a/cli/src/cli.c
+++ b/cli/src/cli.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -99,43 +89,6 @@ rpc_clnt_prog_t *cli_rpc_prog;
extern struct rpc_clnt_program cli_prog;
-
-
-
-static char *
-generate_uuid ()
-{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
-
- if (gettimeofday (&tv, NULL) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
-
- if (gethostname (hostname, 256) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
-
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (tmp_str, 1024, "%s-%d-%s:%"
-#ifdef GF_DARWIN_HOST_OS
- PRId32,
-#else
- "ld",
-#endif
- hostname, getpid(), now_str, tv.tv_usec);
-
- return gf_strdup (tmp_str);
-}
-
static int
glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
{
@@ -145,13 +98,13 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
xlator_mem_acct_init (THIS, cli_mt_end);
- ctx->process_uuid = generate_uuid ();
+ ctx->process_uuid = generate_glusterfs_ctx_id ();
if (!ctx->process_uuid)
return -1;
ctx->page_size = 128 * GF_UNIT_KB;
- ctx->iobuf_pool = iobuf_pool_new (8 * GF_UNIT_MB, ctx->page_size);
+ ctx->iobuf_pool = iobuf_pool_new ();
if (!ctx->iobuf_pool)
return -1;
@@ -164,22 +117,33 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
if (!pool)
return -1;
- /* frame_mem_pool size 112 * 16k */
- pool->frame_mem_pool = mem_pool_new (call_frame_t, 16384);
-
+ /* frame_mem_pool size 112 * 64 */
+ pool->frame_mem_pool = mem_pool_new (call_frame_t, 32);
if (!pool->frame_mem_pool)
return -1;
- /* stack_mem_pool size 256 * 8k */
- pool->stack_mem_pool = mem_pool_new (call_stack_t, 8192);
+ /* stack_mem_pool size 256 * 128 */
+ pool->stack_mem_pool = mem_pool_new (call_stack_t, 16);
if (!pool->stack_mem_pool)
return -1;
- ctx->stub_mem_pool = mem_pool_new (call_stub_t, 1024);
+ ctx->stub_mem_pool = mem_pool_new (call_stub_t, 16);
if (!ctx->stub_mem_pool)
return -1;
+ ctx->dict_pool = mem_pool_new (dict_t, 32);
+ if (!ctx->dict_pool)
+ return -1;
+
+ ctx->dict_pair_pool = mem_pool_new (data_pair_t, 512);
+ if (!ctx->dict_pair_pool)
+ return -1;
+
+ ctx->dict_data_pool = mem_pool_new (data_t, 512);
+ if (!ctx->dict_data_pool)
+ return -1;
+
INIT_LIST_HEAD (&pool->all_frames);
LOCK_INIT (&pool->lock);
ctx->pool = pool;
@@ -199,12 +163,13 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
static int
-logging_init (struct cli_state *state)
+logging_init (glusterfs_ctx_t *ctx, struct cli_state *state)
{
char *log_file = state->log_file ? state->log_file :
DEFAULT_CLI_LOG_FILE_DIRECTORY "/cli.log";
- if (gf_log_init (log_file) == -1) {
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init (ctx, log_file, NULL) == -1) {
fprintf (stderr, "ERROR: failed to open logfile %s\n",
log_file);
return -1;
@@ -273,6 +238,8 @@ cli_submit_request (void *req, call_frame_t *frame,
out:
if (new_iobref)
iobref_unref (iobref);
+ if (iobuf)
+ iobuf_unref (iobuf);
return ret;
}
@@ -317,16 +284,44 @@ cli_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
return ret;
}
+
+/*
+ * ret: 0: option successfully processed
+ * 1: signalling end of option list
+ * -1: unknown option or other issue
+ */
int
cli_opt_parse (char *opt, struct cli_state *state)
{
char *oarg;
+ if (strcmp (opt, "") == 0)
+ return 1;
+
if (strcmp (opt, "version") == 0) {
- puts (argp_program_version);
+ cli_out ("%s", argp_program_version);
exit (0);
}
+ if (strcmp (opt, "print-logdir") == 0) {
+ cli_out ("%s", DEFAULT_LOG_FILE_DIRECTORY);
+ exit (0);
+ }
+
+ if (strcmp (opt, "print-statedumpdir") == 0) {
+ cli_out ("%s", DEFAULT_VAR_RUN_DIRECTORY);
+ exit (0);
+ }
+
+ if (strcmp (opt, "xml") == 0) {
+#if (HAVE_LIB_XML)
+ state->mode |= GLUSTER_MODE_XML;
+#else
+ cli_err ("XML output not supported. Ignoring '--xml' option");
+#endif
+ return 0;
+ }
+
oarg = strtail (opt, "mode=");
if (oarg) {
if (strcmp (oarg, "script") == 0) {
@@ -358,6 +353,12 @@ cli_opt_parse (char *opt, struct cli_state *state)
return 0;
}
+ oarg = strtail (opt, "glusterd-sock=");
+ if (oarg) {
+ state->glusterd_sock = oarg;
+ return 0;
+ }
+
return -1;
}
@@ -385,9 +386,16 @@ parse_cmdline (int argc, char *argv[], struct cli_state *state)
state->argc--;
/* argv shifted, next check should be at i again */
i--;
+ if (ret == 1) {
+ /* end of cli options */
+ ret = 0;
+ break;
+ }
}
}
+ state->argv[state->argc] = NULL;
+
return ret;
}
@@ -412,7 +420,6 @@ cli_state_init (struct cli_state *state)
int ret = 0;
- state->remote_host = "localhost";
state->log_level = -1;
tree = &state->tree;
@@ -432,11 +439,35 @@ cli_usage_out (const char *usage)
if (!usage || usage[0] == '\0')
return -1;
- cli_out ("Usage: %s", usage);
+ cli_err ("Usage: %s", usage);
return 0;
}
int
+_cli_err (const char *fmt, ...)
+{
+ struct cli_state *state = NULL;
+ va_list ap;
+ int ret = 0;
+
+ state = global_state;
+
+ va_start (ap, fmt);
+
+#ifdef HAVE_READLINE
+ if (state->rl_enabled && !state->rl_processing)
+ return cli_rl_err(state, fmt, ap);
+#endif
+
+ ret = vfprintf (stderr, fmt, ap);
+ fprintf (stderr, "\n");
+ va_end (ap);
+
+ return ret;
+}
+
+
+int
_cli_out (const char *fmt, ...)
{
struct cli_state *state = NULL;
@@ -475,23 +506,46 @@ cli_rpc_init (struct cli_state *state)
if (!options)
goto out;
- ret = dict_set_str (options, "remote-host", state->remote_host);
- if (ret)
- goto out;
-
- if (state->remote_port)
- port = state->remote_port;
+ /* Connect using to glusterd using the specified method, giving
+ * preference to unix socket connection. If nothing is specified connect
+ * to the default glusterd socket
+ */
+ if (state->glusterd_sock) {
+ gf_log ("cli", GF_LOG_INFO, "Connecting to glusterd using "
+ "sockfile %s", state->glusterd_sock);
+ ret = rpc_transport_unix_options_build (&options,
+ state->glusterd_sock,
+ 0);
+ if (ret)
+ goto out;
+ } else if (state->remote_host) {
+ gf_log ("cli", GF_LOG_INFO, "Connecting to remote glusterd at "
+ "%s", state->remote_host);
+ ret = dict_set_str (options, "remote-host", state->remote_host);
+ if (ret)
+ goto out;
- ret = dict_set_int32 (options, "remote-port", port);
- if (ret)
- goto out;
+ if (state->remote_port)
+ port = state->remote_port;
- ret = dict_set_str (options, "transport.address-family", "inet/inet6");
- if (ret)
- goto out;
+ ret = dict_set_int32 (options, "remote-port", port);
+ if (ret)
+ goto out;
- rpc = rpc_clnt_new (options, this->ctx, this->name);
+ ret = dict_set_str (options, "transport.address-family",
+ "inet");
+ if (ret)
+ goto out;
+ } else {
+ gf_log ("cli", GF_LOG_DEBUG, "Connecting to glusterd using "
+ "default socket");
+ ret = rpc_transport_unix_options_build
+ (&options, DEFAULT_GLUSTERD_SOCKFILE, 0);
+ if (ret)
+ goto out;
+ }
+ rpc = rpc_clnt_new (options, this->ctx, this->name, 16);
if (!rpc)
goto out;
@@ -501,7 +555,7 @@ cli_rpc_init (struct cli_state *state)
goto out;
}
- rpc_clnt_start (rpc);
+ ret = rpc_clnt_start (rpc);
out:
if (ret) {
if (rpc)
@@ -525,76 +579,15 @@ void
cli_local_wipe (cli_local_t *local)
{
if (local) {
+ GF_FREE (local->get_vol.volname);
+ if (local->dict)
+ dict_unref (local->dict);
GF_FREE (local);
}
return;
}
-/* If the path exists use realpath(3) to handle extra slashes and to resolve
- * symlinks else strip the extra slashes in the path and return */
-
-int
-cli_canonicalize_path (char *path)
-{
- struct stat sb = {0};
- int ret = -1;
- char *tmppath = NULL;
- char *dir = NULL;
- char *tmpstr = NULL;
- int path_len = 0;
-
- if (!path)
- return ret;
-
- ret = stat (path, &sb);
- if (ret == -1) {
- /* Strip the extra slashes and return */
- tmppath = gf_strdup (path);
- if (tmppath == NULL) {
- ret = -1;
- gf_log ("cli", GF_LOG_ERROR, "Out of memory.");
- goto out;
- }
- bzero (path, strlen(path));
- path[0] = '/';
- dir = strtok_r(tmppath, "/", &tmpstr);
- while (dir) {
- strncpy ((path + path_len + 1), dir, strlen(dir));
- path_len = strlen (path);
- dir = strtok_r(NULL, "/", &tmpstr);
- if (dir)
- strncpy((path + path_len), "/", 1);
- }
- if (path_len == 0)
- path[1] = '\0';
- else
- path[path_len] = '\0';
- ret = 0;
- goto out;
- } else {
- tmppath = gf_strdup(path);
- if (tmppath == NULL) {
- ret = -1;
- gf_log ("cli", GF_LOG_ERROR, "Out of memory.");
- goto out;
- }
- if (realpath (tmppath, path) == NULL) {
- cli_out ("Path manipulation failed: %s",
- strerror(errno));
- gf_log ("cli", GF_LOG_ERROR, "Path manipulation "
- "failed: %s", strerror(errno));
- ret = -1;
- goto out;
- }
- ret = 0;
- }
-out:
- if (tmppath)
- GF_FREE(tmppath);
- return ret;
-}
-
struct cli_state *global_state;
int
@@ -604,13 +597,19 @@ main (int argc, char *argv[])
int ret = -1;
glusterfs_ctx_t *ctx = NULL;
- ret = glusterfs_globals_init ();
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ return ENOMEM;
+
+#ifdef DEBUG
+ gf_mem_acct_enable_set (ctx);
+#endif
+
+ ret = glusterfs_globals_init (ctx);
if (ret)
return ret;
- ctx = glusterfs_ctx_get ();
- if (!ctx)
- return ENOMEM;
+ THIS->ctx = ctx;
ret = glusterfs_ctx_defaults_init (ctx);
if (ret)
@@ -627,7 +626,7 @@ main (int argc, char *argv[])
if (ret)
goto out;
- ret = logging_init (&state);
+ ret = logging_init (ctx, &state);
if (ret)
goto out;
diff --git a/cli/src/cli.h b/cli/src/cli.h
index 0a2fdb54b..8daa4b741 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_H__
#define __CLI_H__
@@ -28,16 +18,24 @@
#include "rpc-clnt.h"
#include "glusterfs.h"
#include "protocol-common.h"
+#include "logging.h"
+
+#include "cli1-xdr.h"
+
+#if (HAVE_LIB_XML)
+#include <libxml/encoding.h>
+#include <libxml/xmlwriter.h>
+#endif
#define DEFAULT_EVENT_POOL_SIZE 16384
#define CLI_GLUSTERD_PORT 24007
#define CLI_DEFAULT_CONN_TIMEOUT 120
#define CLI_DEFAULT_CMD_TIMEOUT 120
-#define CLI_TOP_CMD_TIMEOUT 300 //Longer timeout for volume top
+#define CLI_TEN_MINUTES_TIMEOUT 600 //Longer timeout for volume top
#define DEFAULT_CLI_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
#define CLI_VOL_STATUS_BRICK_LEN 55
#define CLI_TAB_LENGTH 8
-#define CLI_BRICK_STATUS_LINE_LEN 75
+#define CLI_BRICK_STATUS_LINE_LEN 78
enum argp_option_keys {
ARGP_DEBUG_KEY = 133,
@@ -46,12 +44,16 @@ enum argp_option_keys {
#define GLUSTER_MODE_SCRIPT (1 << 0)
#define GLUSTER_MODE_ERR_FATAL (1 << 1)
-
+#define GLUSTER_MODE_XML (1 << 2)
struct cli_state;
struct cli_cmd_word;
struct cli_cmd_tree;
struct cli_cmd;
+extern char *cli_vol_type_str[];
+extern char *cli_vol_status_str[];
+extern char *cli_vol_task_status_str[];
+
typedef int (cli_cmd_cbk_t)(struct cli_state *state,
struct cli_cmd_word *word,
const char **words,
@@ -113,49 +115,66 @@ struct cli_state {
char *log_file;
gf_loglevel_t log_level;
+
+ char *glusterd_sock;
};
struct cli_local {
- union {
- struct {
- dict_t *dict;
- } create_vol;
-
- struct {
- char *volname;
- int flags;
- } start_vol;
-
- struct {
- char *volname;
- int flags;
- } stop_vol;
-
- struct {
- char *volname;
- } delete_vol;
-
- struct {
- char *volname;
- int cmd;
- } defrag_vol;
-
- struct {
- char *volname;
- dict_t *dict;
- } replace_brick;
-
- struct {
- char *volname;
- int flags;
- } get_vol;
-
- struct {
- char *volname;
- }heal_vol;
- } u;
+ struct {
+ char *volname;
+ int flags;
+ } get_vol;
+
+ dict_t *dict;
+ const char **words;
+ /* Marker for volume status all */
+ gf_boolean_t all;
+#if (HAVE_LIB_XML)
+ xmlTextWriterPtr writer;
+ xmlDocPtr doc;
+ int vol_count;
+#endif
};
+struct gf_cli_gsync_detailed_status_ {
+ char *node;
+ char *master;
+ char *slave;
+ char *health;
+ char *uptime;
+ char *files_syncd;
+ char *files_pending;
+ char *bytes_pending;
+ char *deletes_pending;
+};
+
+struct cli_volume_status {
+ int port;
+ int online;
+ uint64_t block_size;
+ uint64_t total_inodes;
+ uint64_t free_inodes;
+ char *brick;
+ char *pid_str;
+ char *free;
+ char *total;
+#ifdef GF_LINUX_HOST_OS
+ char *fs_name;
+ char *mount_options;
+ char *device;
+ char *inode_size;
+#endif
+};
+
+struct snap_config_opt_vals_ {
+ char *op_name;
+ char *question;
+};
+
+typedef struct gf_cli_gsync_detailed_status_ gf_cli_gsync_status_t;
+
+typedef struct cli_volume_status cli_volume_status_t;
+
typedef struct cli_local cli_local_t;
typedef ssize_t (*cli_serialize_t) (struct iovec outmsg, void *args);
@@ -176,10 +195,12 @@ int cli_cmd_process_line (struct cli_state *state, const char *line);
int cli_rl_enable (struct cli_state *state);
int cli_rl_out (struct cli_state *state, const char *fmt, va_list ap);
+int cli_rl_err (struct cli_state *state, const char *fmt, va_list ap);
int cli_usage_out (const char *usage);
int _cli_out (const char *fmt, ...);
+int _cli_err (const char *fmt, ...);
#define cli_out(fmt...) do { \
FMT_WARN (fmt); \
@@ -188,6 +209,13 @@ int _cli_out (const char *fmt, ...);
\
} while (0)
+#define cli_err(fmt...) do { \
+ FMT_WARN (fmt); \
+ \
+ _cli_err(fmt); \
+ \
+ } while (0)
+
int
cli_submit_request (void *req, call_frame_t *frame,
rpc_clnt_prog_t *prog,
@@ -209,7 +237,7 @@ cli_cmd_quota_parse (const char **words, int wordcount, dict_t **opt);
int32_t
cli_cmd_volume_set_parse (const char **words, int wordcount,
- dict_t **options);
+ dict_t **options, char **op_errstr);
int32_t
cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
@@ -233,6 +261,9 @@ cli_cmd_log_filename_parse (const char **words, int wordcount, dict_t **options)
int32_t
cli_cmd_volume_statedump_options_parse (const char **words, int wordcount,
dict_t **options);
+int32_t
+cli_cmd_volume_clrlks_opts_parse (const char **words, int wordcount,
+ dict_t **options);
cli_local_t * cli_local_get ();
@@ -249,9 +280,6 @@ int
cli_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
void *data);
-int
-cli_canonicalize_path (char *path);
-
int32_t
cli_cmd_volume_profile_parse (const char **words, int wordcount,
dict_t **options);
@@ -268,8 +296,105 @@ cli_cmd_volume_status_parse (const char **words, int wordcount,
dict_t **options);
int
-cli_print_brick_status (char *brick, int port, int online, int pid);
+cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int
+cli_cmd_volume_defrag_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int
+cli_print_brick_status (cli_volume_status_t *status);
+
+void
+cli_print_detailed_status (cli_volume_status_t *status);
+
+int
+cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status);
void
cli_print_line (int len);
+
+int
+cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_dict (char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_status_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_status_end (cli_local_t *local);
+
+int
+cli_xml_output_vol_status (cli_local_t *local, dict_t *dict);
+
+int
+cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_info_end (cli_local_t *local);
+
+int
+cli_xml_output_vol_info (cli_local_t *local, dict_t *dict);
+
+int
+cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
+ int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_peer_status (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_create (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_generic_volume (char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_gsync (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+int
+cli_xml_output_vol_status_tasks_detail (cli_local_t *local, dict_t *dict);
+
+char *
+is_server_debug_xlator (void *myframe);
+
+int32_t
+cli_cmd_snapshot_parse (const char **words, int wordcount, dict_t **options,
+ struct cli_state *state);
+
#endif /* __CLI_H__ */
diff --git a/cli/src/input.c b/cli/src/input.c
index a88d35874..a8ea46c6d 100644
--- a/cli/src/input.c
+++ b/cli/src/input.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -44,7 +34,7 @@ cli_batch (void *d)
ret = cli_cmd_process (state, state->argc, state->argv);
gf_log ("", GF_LOG_INFO, "Exiting with: %d", ret);
- exit (ret);
+ exit (-ret);
return NULL;
}
@@ -71,11 +61,11 @@ cli_input (void *d)
if (len > 0 && cmd[len - 1] == '\n') //strip trailing \n
cmd[len - 1] = '\0';
ret = cli_cmd_process_line (state, cmd);
- if (ret == -1 && state->mode & GLUSTER_MODE_ERR_FATAL)
+ if (ret != 0 && state->mode & GLUSTER_MODE_ERR_FATAL)
break;
}
- exit (ret);
+ exit (-ret);
return NULL;
}
diff --git a/cli/src/registry.c b/cli/src/registry.c
index 5b63e82b1..c4abe3be0 100644
--- a/cli/src/registry.c
+++ b/cli/src/registry.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -387,7 +377,7 @@ cli_cmd_register (struct cli_cmd_tree *tree, struct cli_cmd *cmd)
char **tokens = NULL;
int ret = 0;
- GF_ASSERT (cmd)
+ GF_ASSERT (cmd);
if (cmd->reg_cbk)
cmd->reg_cbk (cmd);
diff --git a/configure.ac b/configure.ac
index 2a34a61af..b3d1ed184 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,21 +1,12 @@
-dnl Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-
-dnl This file is part of GlusterFS.
-dnl
-dnl GlusterFS is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU General Public License as published by
-dnl the Free Software Foundation; either version 3 of the License, or
-dnl (at your option) any later version.
+dnl Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+dnl This file is part of GlusterFS.
dnl
-dnl GlusterFS is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-dnl GNU General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU General Public License
-dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
+dnl This file is licensed to you under your choice of the GNU Lesser
+dnl General Public License, version 3 or any later version (LGPLv3 or
+dnl later), or the GNU General Public License, version 2 (GPLv2), in all
+dnl cases as published by the Free Software Foundation.
-AC_INIT([glusterfs],[3git],[gluster-users@gluster.org])
+AC_INIT([glusterfs],[3git],[gluster-users@gluster.org],,[https://github.com/gluster/glusterfs.git])
AM_INIT_AUTOMAKE
@@ -33,13 +24,15 @@ if libtool --help 2>&1 | grep -q quiet; then
AM_LIBTOOLFLAGS="--quiet";
fi
-AM_CONFIG_HEADER([config.h])
+AC_CONFIG_HEADERS([config.h])
AC_CONFIG_FILES([Makefile
- libglusterfs/Makefile
- libglusterfs/src/Makefile
- glusterfsd/Makefile
- glusterfsd/src/Makefile
+ libglusterfs/Makefile
+ libglusterfs/src/Makefile
+ geo-replication/src/peer_gsec_create
+ geo-replication/src/peer_add_secret_pub
+ glusterfsd/Makefile
+ glusterfsd/src/Makefile
rpc/Makefile
rpc/rpc-lib/Makefile
rpc/rpc-lib/src/Makefile
@@ -50,111 +43,162 @@ AC_CONFIG_FILES([Makefile
rpc/rpc-transport/rdma/src/Makefile
rpc/xdr/Makefile
rpc/xdr/src/Makefile
- xlators/Makefile
- xlators/mount/Makefile
- xlators/mount/fuse/Makefile
- xlators/mount/fuse/src/Makefile
- xlators/mount/fuse/utils/mount.glusterfs
- xlators/mount/fuse/utils/mount_glusterfs
- xlators/mount/fuse/utils/Makefile
- xlators/storage/Makefile
- xlators/storage/posix/Makefile
- xlators/storage/posix/src/Makefile
- xlators/cluster/Makefile
- xlators/cluster/afr/Makefile
- xlators/cluster/afr/src/Makefile
- xlators/cluster/stripe/Makefile
- xlators/cluster/stripe/src/Makefile
- xlators/cluster/dht/Makefile
- xlators/cluster/dht/src/Makefile
- xlators/performance/Makefile
- xlators/performance/write-behind/Makefile
- xlators/performance/write-behind/src/Makefile
- xlators/performance/read-ahead/Makefile
- xlators/performance/read-ahead/src/Makefile
- xlators/performance/io-threads/Makefile
- xlators/performance/io-threads/src/Makefile
- xlators/performance/io-cache/Makefile
- xlators/performance/io-cache/src/Makefile
- xlators/performance/symlink-cache/Makefile
- xlators/performance/symlink-cache/src/Makefile
- xlators/performance/quick-read/Makefile
- xlators/performance/quick-read/src/Makefile
- xlators/performance/stat-prefetch/Makefile
- xlators/performance/stat-prefetch/src/Makefile
- xlators/debug/Makefile
- xlators/debug/trace/Makefile
- xlators/debug/trace/src/Makefile
- xlators/debug/error-gen/Makefile
- xlators/debug/error-gen/src/Makefile
- xlators/debug/io-stats/Makefile
- xlators/debug/io-stats/src/Makefile
- xlators/protocol/Makefile
- xlators/protocol/auth/Makefile
- xlators/protocol/auth/addr/Makefile
- xlators/protocol/auth/addr/src/Makefile
- xlators/protocol/auth/login/Makefile
- xlators/protocol/auth/login/src/Makefile
- xlators/protocol/client/Makefile
- xlators/protocol/client/src/Makefile
- xlators/protocol/server/Makefile
- xlators/protocol/server/src/Makefile
- xlators/features/Makefile
- xlators/features/locks/Makefile
- xlators/features/locks/src/Makefile
- xlators/features/trash/Makefile
- xlators/features/trash/src/Makefile
- xlators/features/quota/Makefile
- xlators/features/quota/src/Makefile
+ xlators/Makefile
+ xlators/mount/Makefile
+ xlators/mount/fuse/Makefile
+ xlators/mount/fuse/src/Makefile
+ xlators/mount/fuse/utils/mount.glusterfs
+ xlators/mount/fuse/utils/mount_glusterfs
+ xlators/mount/fuse/utils/Makefile
+ xlators/storage/Makefile
+ xlators/storage/posix/Makefile
+ xlators/storage/posix/src/Makefile
+ xlators/storage/bd/Makefile
+ xlators/storage/bd/src/Makefile
+ xlators/cluster/Makefile
+ xlators/cluster/afr/Makefile
+ xlators/cluster/afr/src/Makefile
+ xlators/cluster/stripe/Makefile
+ xlators/cluster/stripe/src/Makefile
+ xlators/cluster/dht/Makefile
+ xlators/cluster/dht/src/Makefile
+ xlators/performance/Makefile
+ xlators/performance/write-behind/Makefile
+ xlators/performance/write-behind/src/Makefile
+ xlators/performance/read-ahead/Makefile
+ xlators/performance/read-ahead/src/Makefile
+ xlators/performance/readdir-ahead/Makefile
+ xlators/performance/readdir-ahead/src/Makefile
+ xlators/performance/io-threads/Makefile
+ xlators/performance/io-threads/src/Makefile
+ xlators/performance/io-cache/Makefile
+ xlators/performance/io-cache/src/Makefile
+ xlators/performance/symlink-cache/Makefile
+ xlators/performance/symlink-cache/src/Makefile
+ xlators/performance/quick-read/Makefile
+ xlators/performance/quick-read/src/Makefile
+ xlators/performance/open-behind/Makefile
+ xlators/performance/open-behind/src/Makefile
+ xlators/performance/md-cache/Makefile
+ xlators/performance/md-cache/src/Makefile
+ xlators/debug/Makefile
+ xlators/debug/trace/Makefile
+ xlators/debug/trace/src/Makefile
+ xlators/debug/error-gen/Makefile
+ xlators/debug/error-gen/src/Makefile
+ xlators/debug/io-stats/Makefile
+ xlators/debug/io-stats/src/Makefile
+ xlators/protocol/Makefile
+ xlators/protocol/auth/Makefile
+ xlators/protocol/auth/addr/Makefile
+ xlators/protocol/auth/addr/src/Makefile
+ xlators/protocol/auth/login/Makefile
+ xlators/protocol/auth/login/src/Makefile
+ xlators/protocol/client/Makefile
+ xlators/protocol/client/src/Makefile
+ xlators/protocol/server/Makefile
+ xlators/protocol/server/src/Makefile
+ xlators/features/Makefile
+ xlators/features/changelog/Makefile
+ xlators/features/changelog/src/Makefile
+ xlators/features/changelog/lib/Makefile
+ xlators/features/changelog/lib/src/Makefile
+ xlators/features/glupy/Makefile
+ xlators/features/glupy/src/Makefile
+ xlators/features/locks/Makefile
+ xlators/features/locks/src/Makefile
+ xlators/features/quota/Makefile
+ xlators/features/quota/src/Makefile
xlators/features/marker/Makefile
xlators/features/marker/src/Makefile
- xlators/features/marker/utils/Makefile
- xlators/features/marker/utils/src/Makefile
- xlators/features/marker/utils/syncdaemon/Makefile
- xlators/features/read-only/Makefile
- xlators/features/read-only/src/Makefile
- xlators/features/mac-compat/Makefile
- xlators/features/mac-compat/src/Makefile
- xlators/features/quiesce/Makefile
- xlators/features/quiesce/src/Makefile
- xlators/encryption/Makefile
- xlators/encryption/rot-13/Makefile
- xlators/encryption/rot-13/src/Makefile
+ xlators/features/read-only/Makefile
+ xlators/features/read-only/src/Makefile
+ xlators/features/compress/Makefile
+ xlators/features/compress/src/Makefile
+ xlators/features/mac-compat/Makefile
+ xlators/features/mac-compat/src/Makefile
+ xlators/features/quiesce/Makefile
+ xlators/features/quiesce/src/Makefile
+ xlators/features/index/Makefile
+ xlators/features/index/src/Makefile
+ xlators/features/protect/Makefile
+ xlators/features/protect/src/Makefile
+ xlators/features/gfid-access/Makefile
+ xlators/features/gfid-access/src/Makefile
+ xlators/playground/Makefile
+ xlators/playground/template/Makefile
+ xlators/playground/template/src/Makefile
+ xlators/encryption/Makefile
+ xlators/encryption/rot-13/Makefile
+ xlators/encryption/rot-13/src/Makefile
+ xlators/encryption/crypt/Makefile
+ xlators/encryption/crypt/src/Makefile
+ xlators/features/qemu-block/Makefile
+ xlators/features/qemu-block/src/Makefile
xlators/system/Makefile
xlators/system/posix-acl/Makefile
xlators/system/posix-acl/src/Makefile
- cli/Makefile
- cli/src/Makefile
- doc/Makefile
- doc/examples/Makefile
- doc/hacker-guide/Makefile
- extras/Makefile
- extras/init.d/Makefile
- extras/init.d/glusterd.plist
- extras/init.d/glusterd-Debian
- extras/init.d/glusterd-Redhat
- extras/init.d/glusterd-SuSE
- extras/benchmarking/Makefile
- contrib/fuse-util/Makefile
xlators/nfs/Makefile
xlators/nfs/server/Makefile
xlators/nfs/server/src/Makefile
xlators/mgmt/Makefile
xlators/mgmt/glusterd/Makefile
xlators/mgmt/glusterd/src/Makefile
- glusterfs.spec])
+ cli/Makefile
+ cli/src/Makefile
+ doc/Makefile
+ extras/Makefile
+ extras/init.d/Makefile
+ extras/init.d/glusterd.plist
+ extras/init.d/glusterd-Debian
+ extras/init.d/glusterd-Redhat
+ extras/init.d/glusterd-SuSE
+ extras/systemd/Makefile
+ extras/systemd/glusterd.service
+ extras/benchmarking/Makefile
+ extras/hook-scripts/Makefile
+ extras/ocf/Makefile
+ extras/ocf/glusterd
+ extras/ocf/volume
+ extras/LinuxRPM/Makefile
+ extras/geo-rep/Makefile
+ contrib/fuse-util/Makefile
+ contrib/uuid/uuid_types.h
+ glusterfs-api.pc
+ libgfchangelog.pc
+ api/Makefile
+ api/src/Makefile
+ api/examples/Makefile
+ api/examples/setup.py
+ geo-replication/Makefile
+ geo-replication/src/Makefile
+ geo-replication/syncdaemon/Makefile
+ glusterfs.spec])
AC_CANONICAL_HOST
AC_PROG_CC
+AC_DISABLE_STATIC
AC_PROG_LIBTOOL
+AC_ARG_WITH(pkgconfigdir,
+ [ --with-pkgconfigdir=DIR pkgconfig file in DIR @<:@LIBDIR/pkgconfig@:>@],
+ [pkgconfigdir=$withval],
+ [pkgconfigdir='${libdir}/pkgconfig'])
+AC_SUBST(pkgconfigdir)
+
AC_ARG_WITH(mountutildir,
[ --with-mountutildir=DIR mount helper utility in DIR @<:@/sbin@:>@],
[mountutildir=$withval],
[mountutildir='/sbin'])
AC_SUBST(mountutildir)
+AC_ARG_WITH(systemddir,
+ [ --with-systemddir=DIR systemd service files in DIR @<:@/usr/lib/systemd/system@:>@],
+ [systemddir=$withval],
+ [systemddir='/usr/lib/systemd/system'])
+AC_SUBST(systemddir)
+
AC_ARG_WITH(initdir,
[ --with-initdir=DIR init.d scripts in DIR @<:@/etc/init.d@:>@],
[initdir=$withval],
@@ -167,12 +211,35 @@ AC_ARG_WITH(launchddir,
[launchddir='/Library/LaunchDaemons'])
AC_SUBST(launchddir)
+AC_ARG_WITH([ocf],
+ [AS_HELP_STRING([--without-ocf], [build OCF-compliant cluster resource agents])],
+ ,
+ [OCF_SUBDIR='ocf'],
+ )
+AC_SUBST(OCF_SUBDIR)
+
# LEX needs a check
AC_PROG_LEX
if test "x${LEX}" != "xflex" -a "x${FLEX}" != "xlex"; then
AC_MSG_ERROR([Flex or lex required to build glusterfs.])
fi
+dnl
+dnl Word sizes...
+dnl
+AC_CHECK_SIZEOF(short)
+AC_CHECK_SIZEOF(int)
+AC_CHECK_SIZEOF(long)
+AC_CHECK_SIZEOF(long long)
+SIZEOF_SHORT=$ac_cv_sizeof_short
+SIZEOF_INT=$ac_cv_sizeof_int
+SIZEOF_LONG=$ac_cv_sizeof_long
+SIZEOF_LONG_LONG=$ac_cv_sizeof_long_long
+AC_SUBST(SIZEOF_SHORT)
+AC_SUBST(SIZEOF_INT)
+AC_SUBST(SIZEOF_LONG)
+AC_SUBST(SIZEOF_LONG_LONG)
+
# YACC needs a check
AC_PROG_YACC
if test "x${YACC}" = "xbyacc" -o "x${YACC}" = "xyacc" -o "x${YACC}" = "x"; then
@@ -181,8 +248,10 @@ fi
AC_CHECK_TOOL([LD],[ld])
+AC_CHECK_LIB([crypto], [MD5], , AC_MSG_ERROR([OpenSSL crypto library is required to build glusterfs]))
+
AC_CHECK_LIB([pthread], [pthread_mutex_init], , AC_MSG_ERROR([Posix threads library is required to build glusterfs]))
-
+
AC_CHECK_FUNC([dlopen], [has_dlopen=yes], AC_CHECK_LIB([dl], [dlopen], , AC_MSG_ERROR([Dynamic linking library required to build glusterfs])))
@@ -190,6 +259,10 @@ AC_CHECK_HEADERS([sys/xattr.h])
AC_CHECK_HEADERS([sys/extattr.h])
+AC_CHECK_HEADERS([openssl/md5.h])
+
+AC_CHECK_HEADERS([linux/falloc.h])
+
case $host_os in
darwin*)
if ! test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -ge 5; then
@@ -223,8 +296,8 @@ fi
# FUSE section
AC_ARG_ENABLE([fuse-client],
- AC_HELP_STRING([--disable-fuse-client],
- [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
+ AC_HELP_STRING([--disable-fuse-client],
+ [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
BUILD_FUSE_CLIENT=no
if test "x$enable_fuse_client" != "xno"; then
@@ -232,60 +305,153 @@ if test "x$enable_fuse_client" != "xno"; then
BUILD_FUSE_CLIENT="yes"
fi
+AC_ARG_ENABLE([bd-xlator],
+ AC_HELP_STRING([--enable-bd-xlator], [Build BD xlator]))
+
+if test "x$enable_bd_xlator" != "xno"; then
+ AC_CHECK_LIB([lvm2app],
+ [lvm_init,lvm_lv_from_name],
+ [HAVE_BD_LIB="yes"],
+ [HAVE_BD_LIB="no"])
+
+if test "x$HAVE_BD_LIB" = "xyes"; then
+ # lvm_lv_from_name() has been made public with lvm2-2.02.79
+ AC_CHECK_DECLS(
+ [lvm_lv_from_name],
+ [NEED_LVM_LV_FROM_NAME_DECL="no"],
+ [NEED_LVM_LV_FROM_NAME_DECL="yes"],
+ [[#include <lvm2app.h>]])
+ fi
+fi
+
+if test "x$enable_bd_xlator" = "xyes" -a "x$HAVE_BD_LIB" = "xno"; then
+ echo "BD xlator requested but required lvm2 development library not found."
+ exit 1
+fi
+
+BUILD_BD_XLATOR=no
+if test "x${enable-bd-xlator}" != "xno" -a "x${HAVE_BD_LIB}" = "xyes"; then
+ BUILD_BD_XLATOR=yes
+ AC_DEFINE(HAVE_BD_XLATOR, 1, [define if lvm2app library found and bd xlator
+ enabled])
+ if test "x$NEED_LVM_LV_FROM_NAME_DECL" = "xyes"; then
+ AC_DEFINE(NEED_LVM_LV_FROM_NAME_DECL, 1, [defined if lvm_lv_from_name()
+ was not found in the lvm2app.h header, but can be linked])
+ fi
+fi
+
+AM_CONDITIONAL([ENABLE_BD_XLATOR], [test x$BUILD_BD_XLATOR = xyes])
+
+# start encryption/crypt section
+
+AC_CHECK_HEADERS([openssl/cmac.h], [have_cmac_h=yes], [have_cmac_h=no])
+
+AC_ARG_ENABLE([crypt-xlator],
+ AC_HELP_STRING([--enable-crypt-xlator], [Build crypt encryption xlator]))
+
+if test "x$enable_crypt_xlator" = "xyes" -a "x$have_cmac_h" = "xno"; then
+ echo "Encryption xlator requires OpenSSL with cmac.h"
+ exit 1
+fi
+
+BUILD_CRYPT_XLATOR=no
+if test "x$enable_crypt_xlator" != "xno" -a "x$have_cmac_h" = "xyes"; then
+ BUILD_CRYPT_XLATOR=yes
+ AC_DEFINE(HAVE_CRYPT_XLATOR, 1, [enable building crypt encryption xlator])
+fi
+
+AM_CONDITIONAL([ENABLE_CRYPT_XLATOR], [test x$BUILD_CRYPT_XLATOR = xyes])
+
AC_SUBST(FUSE_CLIENT_SUBDIR)
# end FUSE section
# FUSERMOUNT section
AC_ARG_ENABLE([fusermount],
- AC_HELP_STRING([--enable-fusermount],
- [Build fusermount]))
+ AC_HELP_STRING([--disable-fusermount],
+ [Use system's fusermount]))
-BUILD_FUSERMOUNT="no"
-if test "x$enable_fusermount" = "xyes"; then
- FUSERMOUNT_SUBDIR="contrib/fuse-util"
- BUILD_FUSERMOUNT="yes"
+BUILD_FUSERMOUNT="yes"
+if test "x$enable_fusermount" = "xno"; then
+ BUILD_FUSERMOUNT="no"
+else
AC_DEFINE(GF_FUSERMOUNT, 1, [Use our own fusermount])
+ FUSERMOUNT_SUBDIR="contrib/fuse-util"
fi
AC_SUBST(FUSERMOUNT_SUBDIR)
#end FUSERMOUNT section
+# QEMU_BLOCK section
+
+AC_ARG_ENABLE([qemu-block],
+ AC_HELP_STRING([--enable-qemu-block],
+ [Build QEMU Block formats translator]))
+
+if test "x$enable_qemu_block" != "xno"; then
+ PKG_CHECK_MODULES([GLIB], [glib-2.0],
+ [HAVE_GLIB_2="yes"],
+ [HAVE_GLIB_2="no"])
+fi
+
+if test "x$enable_qemu_block" = "xyes" -a "x$HAVE_GLIB_2" = "xno"; then
+ echo "QEMU Block formats translator requires libglib-2.0, but missing."
+ exit 1
+fi
+
+BUILD_QEMU_BLOCK=no
+if test "x${enable_qemu_block}" != "xno" -a "x${HAVE_GLIB_2}" = "xyes"; then
+ BUILD_QEMU_BLOCK=yes
+ AC_DEFINE(HAVE_QEMU_BLOCK, 1, [define if libglib-2.0 library found and QEMU
+ Block translator enabled])
+fi
+
+AM_CONDITIONAL([ENABLE_QEMU_BLOCK], [test x$BUILD_QEMU_BLOCK = xyes])
+
+# end QEMU_BLOCK section
# EPOLL section
AC_ARG_ENABLE([epoll],
- AC_HELP_STRING([--disable-epoll],
- [Use poll instead of epoll.]))
+ AC_HELP_STRING([--disable-epoll],
+ [Use poll instead of epoll.]))
BUILD_EPOLL=no
if test "x$enable_epoll" != "xno"; then
AC_CHECK_HEADERS([sys/epoll.h],
[BUILD_EPOLL=yes],
- [BUILD_EPOLL=no])
+ [BUILD_EPOLL=no])
fi
# end EPOLL section
# IBVERBS section
AC_ARG_ENABLE([ibverbs],
- AC_HELP_STRING([--disable-ibverbs],
- [Do not build the ibverbs transport]))
+ AC_HELP_STRING([--disable-ibverbs],
+ [Do not build the ibverbs transport]))
if test "x$enable_ibverbs" != "xno"; then
AC_CHECK_LIB([ibverbs],
[ibv_get_device_list],
- [HAVE_LIBIBVERBS="yes"],
- [HAVE_LIBIBVERBS="no"])
+ [HAVE_LIBIBVERBS="yes"],
+ [HAVE_LIBIBVERBS="no"])
+ AC_CHECK_LIB([rdmacm], [rdma_create_id], [HAVE_RDMACM="yes"], [HAVE_RDMACM="no"])
fi
-if test "x$enable_ibverbs" = "xyes" -a "x$HAVE_LIBIBVERBS" = "xno"; then
- echo "ibverbs requested but not found."
- exit 1
+if test "x$enable_ibverbs" = "xyes"; then
+ if test "x$HAVE_LIBIBVERBS" = "xno"; then
+ echo "ibverbs-transport requested, but libibverbs is not present."
+ exit 1
+ fi
+
+ if test "x$HAVE_RDMACM" = "xno"; then
+ echo "ibverbs-transport requested, but librdmacm is not present."
+ exit 1
+ fi
fi
BUILD_RDMA=no
BUILD_IBVERBS=no
-if test "x$enable_ibverbs" != "xno" -a "x$HAVE_LIBIBVERBS" = "xyes"; then
+if test "x$enable_ibverbs" != "xno" -a "x$HAVE_LIBIBVERBS" = "xyes" -a "x$HAVE_RDMACM" = "xyes"; then
IBVERBS_SUBDIR=ib-verbs
BUILD_IBVERBS=yes
RDMA_SUBDIR=rdma
@@ -299,8 +465,8 @@ AC_SUBST(RDMA_SUBDIR)
# SYNCDAEMON section
AC_ARG_ENABLE([georeplication],
- AC_HELP_STRING([--disable-georeplication],
- [Do not install georeplication components]))
+ AC_HELP_STRING([--disable-georeplication],
+ [Do not install georeplication components]))
BUILD_SYNCDAEMON=no
case $host_os in
@@ -312,12 +478,12 @@ case $host_os in
;;
*)
#disabling geo replication for non-linux platforms
- enable_georeplication=no
+ enable_georeplication=no
;;
esac
SYNCDAEMON_COMPILE=0
if test "x$enable_georeplication" != "xno"; then
- SYNCDAEMON_SUBDIR=utils
+ SYNCDAEMON_SUBDIR=geo-replication
SYNCDAEMON_COMPILE=1
BUILD_SYNCDAEMON="yes"
@@ -341,15 +507,67 @@ AC_SUBST(SYNCDAEMON_COMPILE)
AC_SUBST(SYNCDAEMON_SUBDIR)
# end SYNCDAEMON section
-#check if libxml is present if so enable HAVE_LIB_XML
-echo -n "checking if libxml2 is present... "
+# CDC xlator - check if libz is present if so enable HAVE_LIB_Z
+echo -n "checking if libz is present... "
-PKG_CHECK_MODULES([LIBXML2], [libxml-2.0 >= 2.6.19],
- [echo "yes (features requiring libxml2 enabled)" AC_DEFINE(HAVE_LIB_XML, 1, [define if libxml2 is present])],
+PKG_CHECK_MODULES([ZLIB], [zlib >= 1.2.0],
+ [echo "yes (features requiring zlib enabled)" AC_DEFINE(HAVE_LIB_Z, 1, [define if zlib is present])],
[echo "no"] )
-AC_SUBST(LIBXML2_CFLAGS)
-AC_SUBST(LIBXML2_LIBS)
+AC_SUBST(LIBZ_CFLAGS)
+AC_SUBST(LIBZ_LIBS)
+# end CDC xlator secion
+
+# check for systemtap/dtrace
+BUILD_SYSTEMTAP=no
+AC_MSG_CHECKING([whether to include systemtap tracing support])
+AC_ARG_ENABLE([systemtap],
+ [AS_HELP_STRING([--enable-systemtap],
+ [Enable inclusion of systemtap trace support])],
+ [ENABLE_SYSTEMTAP="${enableval}"], [ENABLE_SYSTEMTAP="def"])
+
+AM_CONDITIONAL([ENABLE_SYSTEMTAP], [test "x${ENABLE_SYSTEMTAP}" = "xyes"])
+AC_MSG_RESULT(${ENABLE_SYSTEMTAP})
+
+if test "x${ENABLE_SYSTEMTAP}" != "xno"; then
+ AC_CHECK_PROG(DTRACE, dtrace, "yes", "no")
+ AC_CHECK_HEADER([sys/sdt.h], [SDT_H_FOUND="yes"],
+ [SDT_H_FOUND="no"])
+fi
+
+if test "x${ENABLE_SYSTEMTAP}" = "xyes"; then
+ if test "x${DTRACE}" = "xno"; then
+ AC_MSG_ERROR([dtrace not found])
+ elif test "$x{SDT_H_FOUND}" = "xno"; then
+ AC_MSG_ERROR([systemtap support needs sys/sdt.h header])
+ fi
+fi
+
+if test "x${DTRACE}" = "xyes" -a "x${SDT_H_FOUND}" = "xyes"; then
+ AC_MSG_CHECKING([x"${DTRACE}"xy"${SDT_H_FOUND}"y])
+ AC_DEFINE([HAVE_SYSTEMTAP], [1], [Define to 1 if using probes.])
+ BUILD_SYSTEMTAP=yes
+fi
+# end of systemtap/dtrace
+
+# xml-output
+AC_ARG_ENABLE([xml-output],
+ AC_HELP_STRING([--disable-xml-output],
+ [Disable the xml output]))
+BUILD_XML_OUTPUT="yes"
+if test "x$enable_xml_output" != "xno"; then
+ #check if libxml is present if so enable HAVE_LIB_XML
+ m4_ifdef([AM_PATH_XML2],[AM_PATH_XML2([2.6.19])], [no_xml=yes])
+ if test "x${no_xml}" = "x"; then
+ AC_DEFINE([HAVE_LIB_XML], [1], [Define to 1 if using libxml2.])
+ else
+ AC_MSG_WARN([libxml2 devel libraries not found disabling XML support])
+ BUILD_XML_OUTPUT="no"
+ fi
+else
+ BUILD_XML_OUTPUT="no"
+fi
+# end of xml-output
dnl FreeBSD > 5 has execinfo as a Ported library for giving a workaround
dnl solution to GCC backtrace functionality
@@ -374,17 +592,26 @@ dnl Linux, Solaris, Cygwin
AC_CHECK_MEMBERS([struct stat.st_atim.tv_nsec])
dnl FreeBSD, NetBSD
AC_CHECK_MEMBERS([struct stat.st_atimespec.tv_nsec])
+case $host_os in
+ *netbsd*)
+ CFLAGS+=" -D_INCOMPLETE_XOPEN_C063"
+ ;;
+esac
AC_CHECK_FUNC([linkat], [have_linkat=yes])
if test "x${have_linkat}" = "xyes"; then
AC_DEFINE(HAVE_LINKAT, 1, [define if found linkat])
fi
AC_SUBST(HAVE_LINKAT)
+dnl check for Monotonic clock
+AC_CHECK_FUNC([clock_gettime], [has_monotonic_clock=yes], AC_CHECK_LIB([rt], [clock_gettime], , AC_MSG_WARN([System doesn't have monotonic clock using contrib])))
+
dnl Check for argp
AC_CHECK_HEADER([argp.h], AC_DEFINE(HAVE_ARGP, 1, [have argp]))
AC_CONFIG_SUBDIRS(argp-standalone)
+
BUILD_ARGP_STANDALONE=no
-if test "x${ac_cv_header_argp_h}" = "xno"; then
+if test "x${ac_cv_header_argp_h}" = "xno"; then
BUILD_ARGP_STANDALONE=yes
ARGP_STANDALONE_CPPFLAGS='-I${top_srcdir}/argp-standalone'
ARGP_STANDALONE_LDADD='${top_builddir}/argp-standalone/libargp.a'
@@ -405,7 +632,18 @@ if test "x${have_fdatasync}" = "xyes"; then
AC_DEFINE(HAVE_FDATASYNC, 1, [define if fdatasync exists])
fi
-# Check the distribution where you are compiling glusterfs on
+AC_CHECK_FUNC([fallocate], [have_fallocate=yes])
+if test "x${have_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_FALLOCATE, 1, [define if fallocate exists])
+fi
+
+AC_CHECK_FUNC([posix_fallocate], [have_posix_fallocate=yes])
+if test "x${have_posix_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists])
+fi
+
+
+# Check the distribution where you are compiling glusterfs on
GF_DISTRIBUTION=
AC_CHECK_FILE([/etc/debian_version])
@@ -427,56 +665,115 @@ AC_SUBST(GF_DISTRIBUTION)
GF_HOST_OS=""
GF_LDFLAGS="-rdynamic"
+# check for gcc -Werror=format-security
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Wformat -Werror=format-security"
+AC_MSG_CHECKING([whether $CC accepts -Werror=format-security])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_format_security=yes], [cc_werror_format_security=no])
+echo $cc_werror_format_security
+if test "x$cc_werror_format_security" = "xno"; then
+ CFLAGS="$saved_CFLAGS"
+else
+ CFLAGS="$saved_CFLAGS $CFLAGS"
+fi
+
+# check for gcc -Werror=implicit-function-declaration
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Werror=implicit-function-declaration"
+AC_MSG_CHECKING([whether $CC accepts -Werror=implicit-function-declaration])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_implicit=yes], [cc_werror_implicit=no])
+echo $cc_werror_implicit
+if test "x$cc_werror_implicit" = "xno"; then
+ CFLAGS="$saved_CFLAGS"
+else
+ CFLAGS="$saved_CFLAGS $CFLAGS"
+fi
+
case $host_os in
linux*)
- dnl GF_LINUX_HOST_OS=1
GF_HOST_OS="GF_LINUX_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(bindir)\\\""
- ;;
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(bindir)\\\""
+ ;;
solaris*)
GF_HOST_OS="GF_SOLARIS_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -O0 -m64"
- GF_LDFLAGS=""
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_GLUSTERFS_LDFLAGS="-lnsl -lresolv -lsocket"
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -O0 -m64"
+ GF_LDFLAGS=""
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ GF_GLUSTERFS_LIBS="-lnsl -lresolv -lsocket"
BUILD_FUSE_CLIENT=no
FUSE_CLIENT_SUBDIR=""
- ;;
+ ;;
*netbsd*)
- GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS}"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- if test "x$ac_cv_header_execinfo_h" = "xyes"; then
- GF_GLUSTERFS_LDFLAGS="-lexecinfo"
- fi
- GF_FUSE_LDADD="-liconv -lperfuse"
- BUILD_FUSE_CLIENT=no
- ;;
+ GF_HOST_OS="GF_BSD_HOST_OS"
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_INCOMPLETE_XOPEN_C063"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_GLUSTERFS_LIBS="-lexecinfo"
+ fi
+ GF_FUSE_LDADD="-lperfuse"
+ BUILD_FUSE_CLIENT=yes
+ LEXLIB=""
+ ;;
*bsd*)
GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- if test "x$ac_cv_header_execinfo_h" = "xyes"; then
- GF_GLUSTERFS_LDFLAGS="-lexecinfo"
- fi
- BUILD_FUSE_CLIENT=no
- ;;
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_GLUSTERFS_LIBS="-lexecinfo"
+ fi
+ BUILD_FUSE_CLIENT=no
+ ;;
darwin*)
GF_HOST_OS="GF_DARWIN_HOST_OS"
- LIBTOOL=glibtool
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -bundle -undefined suppress -flat_namespace -D_XOPEN_SOURCE -O0"
- GF_GLUSTERFS_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -undefined suppress -flat_namespace -O0"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_FUSE_CFLAGS="-I\$(CONTRIBDIR)/macfuse"
- ;;
+ LIBTOOL=glibtool
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -bundle -undefined suppress -flat_namespace -D_XOPEN_SOURCE -O0"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_GLUSTERFS_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -undefined suppress -flat_namespace -O0"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ GF_FUSE_CFLAGS="-I\$(CONTRIBDIR)/macfuse"
+ ;;
esac
+# enable debug section
+AC_ARG_ENABLE([debug],
+ AC_HELP_STRING([--enable-debug],
+ [Enable debug build options.]))
+
+BUILD_DEBUG=no
+if test "x$enable_debug" = "xyes"; then
+ BUILD_DEBUG=yes
+ CFLAGS=`echo $CFLAGS | sed -e s/O2/O0/`
+else
+ BUILD_DEBUG=no
+fi
+AC_SUBST(CFLAGS)
+# end enable debug section
+
+# syslog section
+AC_ARG_ENABLE([syslog],
+ AC_HELP_STRING([--disable-syslog],
+ [Disable syslog for logging]))
+
+USE_SYSLOG="yes"
+if test "x$enable_syslog" != "xno"; then
+ AC_DEFINE(GF_USE_SYSLOG, 1, [Use syslog for logging])
+else
+ USE_SYSLOG="no"
+fi
+AM_CONDITIONAL([ENABLE_SYSLOG], [test x$USE_SYSLOG = xyes])
+#end syslog section
+
BUILD_READLINE=no
AC_CHECK_LIB([readline -lcurses],[readline],[RLLIBS="-lreadline -lcurses"])
AC_CHECK_LIB([readline -ltermcap],[readline],[RLLIBS="-lreadline -ltermcap"])
@@ -487,8 +784,66 @@ if test "x$RLLIBS" != "x"; then
BUILD_READLINE=yes
fi
+BUILD_LIBAIO=no
+AC_CHECK_LIB([aio],[io_setup],[LIBAIO="-laio"])
+
+if test "x$LIBAIO" != "x"; then
+ AC_DEFINE(HAVE_LIBAIO, 1, [libaio based POSIX enabled])
+ BUILD_LIBAIO=yes
+fi
+
+# glupy section
+BUILD_GLUPY=no
+have_python2=no
+have_Python_h=no
+
+AM_PATH_PYTHON()
+if echo $PYTHON_VERSION | grep ^2; then
+ have_python2=yes
+fi
+AC_CHECK_HEADERS([python$PYTHON_VERSION/Python.h],[have_Python_h=yes],[])
+AC_ARG_ENABLE([glupy],
+ AS_HELP_STRING([--enable-glupy],
+ [build glupy]))
+case x$enable_glupy in
+ xyes)
+ if test "x$have_python2" = "xyes" -a "x$have_Python_h" = "xyes"; then
+ BUILD_GLUPY=yes
+ else
+ AC_MSG_ERROR([glupy requires python-devel/python-dev package and python2.x])
+ fi
+ ;;
+ xno)
+ ;;
+ *)
+ if test "x$have_python2" = "xyes" -a "x$have_Python_h" = "xyes"; then
+ BUILD_GLUPY=yes
+ else
+ AC_MSG_WARN([
+ ---------------------------------------------------------------------------------
+ cannot build glupy. python 2.x and python-devel/python-dev package are required.
+ ---------------------------------------------------------------------------------])
+ fi
+ ;;
+esac
+
+if test "x$BUILD_GLUPY" = "xyes"; then
+ BUILD_PYTHON_INC=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_inc()"`
+ BUILD_PYTHON_LIB=python$PYTHON_VERSION
+ GLUPY_SUBDIR=glupy
+ GLUPY_SUBDIR_MAKEFILE=xlators/features/glupy/Makefile
+ GLUPY_SUBDIR_SRC_MAKEFILE=xlators/features/glupy/src/Makefile
+ echo "building glupy with -isystem $BUILD_PYTHON_INC -l $BUILD_PYTHON_LIB"
+ AC_SUBST(BUILD_PYTHON_INC)
+ AC_SUBST(BUILD_PYTHON_LIB)
+ AC_SUBST(GLUPY_SUBDIR)
+ AC_SUBST(GLUPY_SUBDIR_MAKEFILE)
+ AC_SUBST(GLUPY_SUBDIR_SRC_MAKEFILE)
+fi
+# end glupy section
+
AC_SUBST(GF_HOST_OS)
-AC_SUBST(GF_GLUSTERFS_LDFLAGS)
+AC_SUBST([GF_GLUSTERFS_LIBS])
AC_SUBST(GF_GLUSTERFS_CFLAGS)
AC_SUBST(GF_CFLAGS)
AC_SUBST(GF_LDFLAGS)
@@ -496,27 +851,41 @@ AC_SUBST(GF_LDADD)
AC_SUBST(GF_FUSE_LDADD)
AC_SUBST(GF_FUSE_CFLAGS)
AC_SUBST(RLLIBS)
+AC_SUBST(LIBAIO)
AC_SUBST(AM_MAKEFLAGS)
AC_SUBST(AM_LIBTOOLFLAGS)
CONTRIBDIR='$(top_srcdir)/contrib'
AC_SUBST(CONTRIBDIR)
-INCLUDES='-I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/uuid'
-AC_SUBST(INCLUDES)
+GF_CPPDEFINES='-D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)'
+GF_CPPINCLUDES='-I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/uuid'
+GF_CPPFLAGS="$GF_CPPDEFINES $GF_CPPINCLUDES"
+AC_SUBST([GF_CPPFLAGS])
+
+AM_CONDITIONAL([GF_DARWIN_HOST_OS], test "${GF_HOST_OS}" = "GF_DARWIN_HOST_OS")
-AM_CONDITIONAL([GF_DARWIN_HOST_OS], test "${GF_HOST_OS}" = "GF_DARWIN_HOST_OS")
+AM_CONDITIONAL([GF_INSTALL_VAR_LIB_GLUSTERD], test ! -d ${localstatedir}/lib/glusterd && test -d ${sysconfdir}/glusterd )
AC_OUTPUT
echo
echo "GlusterFS configure summary"
echo "==========================="
-echo "FUSE client : $BUILD_FUSE_CLIENT"
-echo "Infiniband verbs : $BUILD_IBVERBS"
-echo "epoll IO multiplex : $BUILD_EPOLL"
-echo "argp-standalone : $BUILD_ARGP_STANDALONE"
-echo "fusermount : $BUILD_FUSERMOUNT"
-echo "readline : $BUILD_READLINE"
-echo "georeplication : $BUILD_SYNCDAEMON"
+echo "FUSE client : $BUILD_FUSE_CLIENT"
+echo "Infiniband verbs : $BUILD_IBVERBS"
+echo "epoll IO multiplex : $BUILD_EPOLL"
+echo "argp-standalone : $BUILD_ARGP_STANDALONE"
+echo "fusermount : $BUILD_FUSERMOUNT"
+echo "readline : $BUILD_READLINE"
+echo "georeplication : $BUILD_SYNCDAEMON"
+echo "Linux-AIO : $BUILD_LIBAIO"
+echo "Enable Debug : $BUILD_DEBUG"
+echo "systemtap : $BUILD_SYSTEMTAP"
+echo "Block Device xlator : $BUILD_BD_XLATOR"
+echo "glupy : $BUILD_GLUPY"
+echo "Use syslog : $USE_SYSLOG"
+echo "XML output : $BUILD_XML_OUTPUT"
+echo "QEMU Block formats : $BUILD_QEMU_BLOCK"
+echo "Encryption xlator : $BUILD_CRYPT_XLATOR"
echo
diff --git a/contrib/aclocal/mkdirp.m4 b/contrib/aclocal/mkdirp.m4
new file mode 100644
index 000000000..d2f7edd5c
--- /dev/null
+++ b/contrib/aclocal/mkdirp.m4
@@ -0,0 +1,146 @@
+# Excerpt from autoconf/autoconf/programs.m4
+# This file is part of Autoconf. -*- Autoconf -*-
+# Checking for programs.
+
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
+# Foundation, Inc.
+
+# This file is part of Autoconf. This program is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the Autoconf Configure Script Exception,
+# version 3.0, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License
+# and a copy of the Autoconf Configure Script Exception along with
+# this program; see the files COPYINGv3 and COPYING.EXCEPTION
+# respectively. If not, see <http://www.gnu.org/licenses/>.
+
+# Written by David MacKenzie, with help from
+# Franc,ois Pinard, Karl Berry, Richard Pixley, Ian Lance Taylor,
+# Roland McGrath, Noah Friedman, david d zuhn, and many others.
+
+# AC_PROG_MKDIR_P
+# ---------------
+# Check whether `mkdir -p' is known to be thread-safe, and fall back to
+# install-sh -d otherwise.
+#
+# Automake 1.8 used `mkdir -m 0755 -p --' to ensure that directories
+# created by `make install' are always world readable, even if the
+# installer happens to have an overly restrictive umask (e.g. 077).
+# This was a mistake. There are at least two reasons why we must not
+# use `-m 0755':
+# - it causes special bits like SGID to be ignored,
+# - it may be too restrictive (some setups expect 775 directories).
+#
+# Do not use -m 0755 and let people choose whatever they expect by
+# setting umask.
+#
+# We cannot accept any implementation of `mkdir' that recognizes `-p'.
+# Some implementations (such as Solaris 8's) are vulnerable to race conditions:
+# if a parallel make tries to run `mkdir -p a/b' and `mkdir -p a/c'
+# concurrently, both version can detect that a/ is missing, but only
+# one can create it and the other will error out. Consequently we
+# restrict ourselves to known race-free implementations.
+#
+# Automake used to define mkdir_p as `mkdir -p .', in order to
+# allow $(mkdir_p) to be used without argument. As in
+# $(mkdir_p) $(somedir)
+# where $(somedir) is conditionally defined. However we don't do
+# that for MKDIR_P.
+# 1. before we restricted the check to GNU mkdir, `mkdir -p .' was
+# reported to fail in read-only directories. The system where this
+# happened has been forgotten.
+# 2. in practice we call $(MKDIR_P) on directories such as
+# $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# and we don't want to create $(DESTDIR) if $(somedir) is empty.
+# To support the latter case, we have to write
+# test -z "$(somedir)" || $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# so $(MKDIR_P) always has an argument.
+# We will have better chances of detecting a missing test if
+# $(MKDIR_P) complains about missing arguments.
+# 3. $(MKDIR_P) is named after `mkdir -p' and we don't expect this
+# to accept no argument.
+# 4. having something like `mkdir .' in the output is unsightly.
+#
+# On NextStep and OpenStep, the `mkdir' command does not
+# recognize any option. It will interpret all options as
+# directories to create.
+AN_MAKEVAR([MKDIR_P], [AC_PROG_MKDIR_P])
+AC_DEFUN_ONCE([AC_PROG_MKDIR_P],
+[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+
+AC_MSG_CHECKING([for a thread-safe mkdir -p])
+if test -z "$MKDIR_P"; then
+ AC_CACHE_VAL([ac_cv_path_mkdir],
+ [_AS_PATH_WALK([$PATH$PATH_SEPARATOR/opt/sfw/bin],
+ [for ac_prog in mkdir gmkdir; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ AS_EXECUTABLE_P(["$as_dir/$ac_prog$ac_exec_ext"]) || continue
+ case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+ 'mkdir (GNU coreutils) '* | \
+ 'mkdir (coreutils) '* | \
+ 'mkdir (fileutils) '4.1*)
+ ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+ break 3;;
+ esac
+ done
+ done])])
+ test -d ./--version && rmdir ./--version
+ if test "${ac_cv_path_mkdir+set}" = set; then
+ MKDIR_P="$ac_cv_path_mkdir -p"
+ else
+ # As a last resort, use the slow shell script. Don't cache a
+ # value for MKDIR_P within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the value is a relative name.
+ MKDIR_P="$ac_install_sh -d"
+ fi
+fi
+dnl status.m4 does special magic for MKDIR_P instead of AC_SUBST,
+dnl to get relative names right. However, also AC_SUBST here so
+dnl that Automake versions before 1.10 will pick it up (they do not
+dnl trace AC_SUBST_TRACE).
+dnl FIXME: Remove this once we drop support for Automake < 1.10.
+AC_SUBST([MKDIR_P])dnl
+AC_MSG_RESULT([$MKDIR_P])
+])# AC_PROG_MKDIR_P
+
+
+# From automake/m4/mkdirp.m4
+## -*- Autoconf -*-
+# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_MKDIR_P
+# ---------------
+# Check for `mkdir -p'.
+AC_DEFUN([AM_PROG_MKDIR_P],
+[
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
+dnl while keeping a definition of mkdir_p for backward compatibility.
+dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
+dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
+dnl Makefile.ins that do not define MKDIR_P, so we do our own
+dnl adjustment using top_builddir (which is defined more often than
+dnl MKDIR_P).
+AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
+case $mkdir_p in
+ [[\\/$]]* | ?:[[\\/]]*) ;;
+ */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
+esac
+])
diff --git a/contrib/aclocal/python.m4 b/contrib/aclocal/python.m4
new file mode 100644
index 000000000..a39a90090
--- /dev/null
+++ b/contrib/aclocal/python.m4
@@ -0,0 +1,209 @@
+## ------------------------ -*- Autoconf -*-
+## Python file handling
+## From Andrew Dalke
+## Updated by James Henstridge
+## ------------------------
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PATH_PYTHON([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+# ---------------------------------------------------------------------------
+# Adds support for distributing Python modules and packages. To
+# install modules, copy them to $(pythondir), using the python_PYTHON
+# automake variable. To install a package with the same name as the
+# automake package, install to $(pkgpythondir), or use the
+# pkgpython_PYTHON automake variable.
+#
+# The variables $(pyexecdir) and $(pkgpyexecdir) are provided as
+# locations to install python extension modules (shared libraries).
+# Another macro is required to find the appropriate flags to compile
+# extension modules.
+#
+# If your package is configured with a different prefix to python,
+# users will have to add the install directory to the PYTHONPATH
+# environment variable, or create a .pth file (see the python
+# documentation for details).
+#
+# If the MINIMUM-VERSION argument is passed, AM_PATH_PYTHON will
+# cause an error if the version of python installed on the system
+# doesn't meet the requirement. MINIMUM-VERSION should consist of
+# numbers and dots only.
+AC_DEFUN([AM_PATH_PYTHON],
+ [
+ dnl Find a Python interpreter. Python versions prior to 2.0 are not
+ dnl supported. (2.0 was released on October 16, 2000).
+ m4_define_default([_AM_PYTHON_INTERPRETER_LIST],
+ [python python2 python3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 dnl
+python2.1 python2.0])
+
+ m4_if([$1],[],[
+ dnl No version check is needed.
+ # Find any Python interpreter.
+ if test -z "$PYTHON"; then
+ AC_PATH_PROGS([PYTHON], _AM_PYTHON_INTERPRETER_LIST, :)
+ fi
+ am_display_PYTHON=python
+ ], [
+ dnl A version check is needed.
+ if test -n "$PYTHON"; then
+ # If the user set $PYTHON, use it and don't search something else.
+ AC_MSG_CHECKING([whether $PYTHON version >= $1])
+ AM_PYTHON_CHECK_VERSION([$PYTHON], [$1],
+ [AC_MSG_RESULT(yes)],
+ [AC_MSG_ERROR(too old)])
+ am_display_PYTHON=$PYTHON
+ else
+ # Otherwise, try each interpreter until we find one that satisfies
+ # VERSION.
+ AC_CACHE_CHECK([for a Python interpreter with version >= $1],
+ [am_cv_pathless_PYTHON],[
+ for am_cv_pathless_PYTHON in _AM_PYTHON_INTERPRETER_LIST none; do
+ test "$am_cv_pathless_PYTHON" = none && break
+ AM_PYTHON_CHECK_VERSION([$am_cv_pathless_PYTHON], [$1], [break])
+ done])
+ # Set $PYTHON to the absolute path of $am_cv_pathless_PYTHON.
+ if test "$am_cv_pathless_PYTHON" = none; then
+ PYTHON=:
+ else
+ AC_PATH_PROG([PYTHON], [$am_cv_pathless_PYTHON])
+ fi
+ am_display_PYTHON=$am_cv_pathless_PYTHON
+ fi
+ ])
+
+ if test "$PYTHON" = :; then
+ dnl Run any user-specified action, or abort.
+ m4_default([$3], [AC_MSG_ERROR([no suitable Python interpreter found])])
+ else
+
+ dnl Query Python for its version number. Getting [:3] seems to be
+ dnl the best way to do this; it's what "site.py" does in the standard
+ dnl library.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON version], [am_cv_python_version],
+ [am_cv_python_version=`$PYTHON -c "import sys; sys.stdout.write(sys.version[[:3]])"`])
+ AC_SUBST([PYTHON_VERSION], [$am_cv_python_version])
+
+ dnl Use the values of $prefix and $exec_prefix for the corresponding
+ dnl values of PYTHON_PREFIX and PYTHON_EXEC_PREFIX. These are made
+ dnl distinct variables so they can be overridden if need be. However,
+ dnl general consensus is that you shouldn't need this ability.
+
+ AC_SUBST([PYTHON_PREFIX], ['${prefix}'])
+ AC_SUBST([PYTHON_EXEC_PREFIX], ['${exec_prefix}'])
+
+ dnl At times (like when building shared libraries) you may want
+ dnl to know which OS platform Python thinks this is.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON platform], [am_cv_python_platform],
+ [am_cv_python_platform=`$PYTHON -c "import sys; sys.stdout.write(sys.platform)"`])
+ AC_SUBST([PYTHON_PLATFORM], [$am_cv_python_platform])
+
+
+ dnl Set up 4 directories:
+
+ dnl pythondir -- where to install python scripts. This is the
+ dnl site-packages directory, not the python standard library
+ dnl directory like in previous automake betas. This behavior
+ dnl is more consistent with lispdir.m4 for example.
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON script directory],
+ [am_cv_python_pythondir],
+ [if test "x$prefix" = xNONE
+ then
+ am_py_prefix=$ac_default_prefix
+ else
+ am_py_prefix=$prefix
+ fi
+ am_cv_python_pythondir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(0,0,prefix='$am_py_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pythondir in
+ $am_py_prefix*)
+ am__strip_prefix=`echo "$am_py_prefix" | sed 's|.|.|g'`
+ am_cv_python_pythondir=`echo "$am_cv_python_pythondir" | sed "s,^$am__strip_prefix,$PYTHON_PREFIX,"`
+ ;;
+ *)
+ case $am_py_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pythondir=$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pythondir], [$am_cv_python_pythondir])
+
+ dnl pkgpythondir -- $PACKAGE directory under pythondir. Was
+ dnl PYTHON_SITE_PACKAGE in previous betas, but this naming is
+ dnl more consistent with the rest of automake.
+
+ AC_SUBST([pkgpythondir], [\${pythondir}/$PACKAGE])
+
+ dnl pyexecdir -- directory for installing python extension modules
+ dnl (shared libraries)
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON extension module directory],
+ [am_cv_python_pyexecdir],
+ [if test "x$exec_prefix" = xNONE
+ then
+ am_py_exec_prefix=$am_py_prefix
+ else
+ am_py_exec_prefix=$exec_prefix
+ fi
+ am_cv_python_pyexecdir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(1,0,prefix='$am_py_exec_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pyexecdir in
+ $am_py_exec_prefix*)
+ am__strip_prefix=`echo "$am_py_exec_prefix" | sed 's|.|.|g'`
+ am_cv_python_pyexecdir=`echo "$am_cv_python_pyexecdir" | sed "s,^$am__strip_prefix,$PYTHON_EXEC_PREFIX,"`
+ ;;
+ *)
+ case $am_py_exec_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pyexecdir=$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pyexecdir], [$am_cv_python_pyexecdir])
+
+ dnl pkgpyexecdir -- $(pyexecdir)/$(PACKAGE)
+
+ AC_SUBST([pkgpyexecdir], [\${pyexecdir}/$PACKAGE])
+
+ dnl Run any user-specified action.
+ $2
+ fi
+
+])
+
+
+# AM_PYTHON_CHECK_VERSION(PROG, VERSION, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+# ---------------------------------------------------------------------------
+# Run ACTION-IF-TRUE if the Python interpreter PROG has version >= VERSION.
+# Run ACTION-IF-FALSE otherwise.
+# This test uses sys.hexversion instead of the string equivalent (first
+# word of sys.version), in order to cope with versions such as 2.2c1.
+# This supports Python 2.0 or higher. (2.0 was released on October 16, 2000).
+AC_DEFUN([AM_PYTHON_CHECK_VERSION],
+ [prog="import sys
+# split strings by '.' and convert to numeric. Append some zeros
+# because we need at least 4 digits for the hex conversion.
+# map returns an iterator in Python 3.0 and a list in 2.x
+minver = list(map(int, '$2'.split('.'))) + [[0, 0, 0]]
+minverhex = 0
+# xrange is not present in Python 3.0 and range returns an iterator
+for i in list(range(0, 4)): minverhex = (minverhex << 8) + minver[[i]]
+sys.exit(sys.hexversion < minverhex)"
+ AS_IF([AM_RUN_LOG([$1 -c "$prog"])], [$3], [$4])])
diff --git a/contrib/fuse-include/fuse-mount.h b/contrib/fuse-include/fuse-mount.h
index 9f83faf02..9358ac810 100644
--- a/contrib/fuse-include/fuse-mount.h
+++ b/contrib/fuse-include/fuse-mount.h
@@ -8,5 +8,6 @@
*/
void gf_fuse_unmount (const char *mountpoint, int fd);
-int gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
- pid_t *mtab_pid);
+int gf_fuse_mount (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param,
+ pid_t *mtab_pid, int status_fd);
diff --git a/contrib/fuse-include/fuse_kernel.h b/contrib/fuse-include/fuse_kernel.h
index 9ae25d6f9..60bb2f9f7 100644
--- a/contrib/fuse-include/fuse_kernel.h
+++ b/contrib/fuse-include/fuse_kernel.h
@@ -60,23 +60,75 @@
* 7.13
* - make max number of background requests and congestion threshold
* tunables
+ *
+ * 7.14
+ * - add splice support to fuse device
+ *
+ * 7.15
+ * - add store notify
+ * - add retrieve notify
+ *
+ * 7.16
+ * - add BATCH_FORGET request
+ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ * - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ * - add FUSE_IOCTL_DIR flag
+ * - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ * - add FUSE_FALLOCATE
+ *
+ * 7.20
+ * - add FUSE_AUTO_INVAL_DATA
+ *
+ * 7.21
+ * - add FUSE_READDIRPLUS
+ * - send the requested events in POLL request
+ *
+ * 7.22
+ * - add FUSE_ASYNC_DIO
*/
#ifndef _LINUX_FUSE_H
#define _LINUX_FUSE_H
-#include <sys/types.h>
-#define __u64 uint64_t
-#define __s64 int64_t
-#define __u32 uint32_t
-#define __s32 int32_t
-#define __u16 uint16_t
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
/** Version number of this interface */
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 13
+#define FUSE_KERNEL_MINOR_VERSION 22
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -85,42 +137,42 @@
userspace works under 64bit kernels */
struct fuse_attr {
- __u64 ino;
- __u64 size;
- __u64 blocks;
- __u64 atime;
- __u64 mtime;
- __u64 ctime;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 ctimensec;
- __u32 mode;
- __u32 nlink;
- __u32 uid;
- __u32 gid;
- __u32 rdev;
- __u32 blksize;
- __u32 padding;
+ uint64_t ino;
+ uint64_t size;
+ uint64_t blocks;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t ctime;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t ctimensec;
+ uint32_t mode;
+ uint32_t nlink;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t rdev;
+ uint32_t blksize;
+ uint32_t padding;
};
struct fuse_kstatfs {
- __u64 blocks;
- __u64 bfree;
- __u64 bavail;
- __u64 files;
- __u64 ffree;
- __u32 bsize;
- __u32 namelen;
- __u32 frsize;
- __u32 padding;
- __u32 spare[6];
+ uint64_t blocks;
+ uint64_t bfree;
+ uint64_t bavail;
+ uint64_t files;
+ uint64_t ffree;
+ uint32_t bsize;
+ uint32_t namelen;
+ uint32_t frsize;
+ uint32_t padding;
+ uint32_t spare[6];
};
struct fuse_file_lock {
- __u64 start;
- __u64 end;
- __u32 type;
- __u32 pid; /* tgid */
+ uint64_t start;
+ uint64_t end;
+ uint32_t type;
+ uint32_t pid; /* tgid */
};
/**
@@ -151,8 +203,22 @@ struct fuse_file_lock {
/**
* INIT request/reply flags
*
+ * FUSE_ASYNC_READ: asynchronous read requests
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
* FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
* FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -161,6 +227,15 @@ struct fuse_file_lock {
#define FUSE_EXPORT_SUPPORT (1 << 4)
#define FUSE_BIG_WRITES (1 << 5)
#define FUSE_DONT_MASK (1 << 6)
+#define FUSE_SPLICE_WRITE (1 << 7)
+#define FUSE_SPLICE_MOVE (1 << 8)
+#define FUSE_SPLICE_READ (1 << 9)
+#define FUSE_FLOCK_LOCKS (1 << 10)
+#define FUSE_HAS_IOCTL_DIR (1 << 11)
+#define FUSE_AUTO_INVAL_DATA (1 << 12)
+#define FUSE_DO_READDIRPLUS (1 << 13)
+#define FUSE_READDIRPLUS_AUTO (1 << 14)
+#define FUSE_ASYNC_DIO (1 << 15)
/**
* CUSE INIT request/reply flags
@@ -173,6 +248,7 @@ struct fuse_file_lock {
* Release flags
*/
#define FUSE_RELEASE_FLUSH (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1)
/**
* Getattr flags
@@ -204,12 +280,16 @@ struct fuse_file_lock {
* FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
* FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
* FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
*
* FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
*/
#define FUSE_IOCTL_COMPAT (1 << 0)
#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
#define FUSE_IOCTL_RETRY (1 << 2)
+#define FUSE_IOCTL_32BIT (1 << 3)
+#define FUSE_IOCTL_DIR (1 << 4)
#define FUSE_IOCTL_MAX_IOV 256
@@ -259,6 +339,10 @@ enum fuse_opcode {
FUSE_DESTROY = 38,
FUSE_IOCTL = 39,
FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -268,6 +352,9 @@ enum fuse_notify_code {
FUSE_NOTIFY_POLL = 1,
FUSE_NOTIFY_INVAL_INODE = 2,
FUSE_NOTIFY_INVAL_ENTRY = 3,
+ FUSE_NOTIFY_STORE = 4,
+ FUSE_NOTIFY_RETRIEVE = 5,
+ FUSE_NOTIFY_DELETE = 6,
FUSE_NOTIFY_CODE_MAX,
};
@@ -277,133 +364,143 @@ enum fuse_notify_code {
#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
struct fuse_entry_out {
- __u64 nodeid; /* Inode ID */
- __u64 generation; /* Inode generation: nodeid:gen must
- be unique for the fs's lifetime */
- __u64 entry_valid; /* Cache timeout for the name */
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 entry_valid_nsec;
- __u32 attr_valid_nsec;
+ uint64_t nodeid; /* Inode ID */
+ uint64_t generation; /* Inode generation: nodeid:gen must
+ be unique for the fs's lifetime */
+ uint64_t entry_valid; /* Cache timeout for the name */
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t entry_valid_nsec;
+ uint32_t attr_valid_nsec;
struct fuse_attr attr;
};
struct fuse_forget_in {
- __u64 nlookup;
+ uint64_t nlookup;
+};
+
+struct fuse_forget_one {
+ uint64_t nodeid;
+ uint64_t nlookup;
+};
+
+struct fuse_batch_forget_in {
+ uint32_t count;
+ uint32_t dummy;
};
struct fuse_getattr_in {
- __u32 getattr_flags;
- __u32 dummy;
- __u64 fh;
+ uint32_t getattr_flags;
+ uint32_t dummy;
+ uint64_t fh;
};
#define FUSE_COMPAT_ATTR_OUT_SIZE 96
struct fuse_attr_out {
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 attr_valid_nsec;
- __u32 dummy;
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t attr_valid_nsec;
+ uint32_t dummy;
struct fuse_attr attr;
};
#define FUSE_COMPAT_MKNOD_IN_SIZE 8
struct fuse_mknod_in {
- __u32 mode;
- __u32 rdev;
- __u32 umask;
- __u32 padding;
+ uint32_t mode;
+ uint32_t rdev;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_mkdir_in {
- __u32 mode;
- __u32 umask;
+ uint32_t mode;
+ uint32_t umask;
};
struct fuse_rename_in {
- __u64 newdir;
+ uint64_t newdir;
};
struct fuse_link_in {
- __u64 oldnodeid;
+ uint64_t oldnodeid;
};
struct fuse_setattr_in {
- __u32 valid;
- __u32 padding;
- __u64 fh;
- __u64 size;
- __u64 lock_owner;
- __u64 atime;
- __u64 mtime;
- __u64 unused2;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 unused3;
- __u32 mode;
- __u32 unused4;
- __u32 uid;
- __u32 gid;
- __u32 unused5;
+ uint32_t valid;
+ uint32_t padding;
+ uint64_t fh;
+ uint64_t size;
+ uint64_t lock_owner;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t unused2;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t unused3;
+ uint32_t mode;
+ uint32_t unused4;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t unused5;
};
struct fuse_open_in {
- __u32 flags;
- __u32 unused;
+ uint32_t flags;
+ uint32_t unused;
};
struct fuse_create_in {
- __u32 flags;
- __u32 mode;
- __u32 umask;
- __u32 padding;
+ uint32_t flags;
+ uint32_t mode;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_open_out {
- __u64 fh;
- __u32 open_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t open_flags;
+ uint32_t padding;
};
struct fuse_release_in {
- __u64 fh;
- __u32 flags;
- __u32 release_flags;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t release_flags;
+ uint64_t lock_owner;
};
struct fuse_flush_in {
- __u64 fh;
- __u32 unused;
- __u32 padding;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t unused;
+ uint32_t padding;
+ uint64_t lock_owner;
};
struct fuse_read_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 read_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t read_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
#define FUSE_COMPAT_WRITE_IN_SIZE 24
struct fuse_write_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 write_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t write_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
struct fuse_write_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
#define FUSE_COMPAT_STATFS_SIZE 48
@@ -413,32 +510,32 @@ struct fuse_statfs_out {
};
struct fuse_fsync_in {
- __u64 fh;
- __u32 fsync_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t fsync_flags;
+ uint32_t padding;
};
struct fuse_setxattr_in {
- __u32 size;
- __u32 flags;
+ uint32_t size;
+ uint32_t flags;
};
struct fuse_getxattr_in {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_getxattr_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_lk_in {
- __u64 fh;
- __u64 owner;
+ uint64_t fh;
+ uint64_t owner;
struct fuse_file_lock lk;
- __u32 lk_flags;
- __u32 padding;
+ uint32_t lk_flags;
+ uint32_t padding;
};
struct fuse_lk_out {
@@ -446,134 +543,190 @@ struct fuse_lk_out {
};
struct fuse_access_in {
- __u32 mask;
- __u32 padding;
+ uint32_t mask;
+ uint32_t padding;
};
struct fuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
};
struct fuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
- __u16 max_background;
- __u16 congestion_threshold;
- __u32 max_write;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
+ uint16_t max_background;
+ uint16_t congestion_threshold;
+ uint32_t max_write;
};
#define CUSE_INIT_INFO_MAX 4096
struct cuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
};
struct cuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
- __u32 max_read;
- __u32 max_write;
- __u32 dev_major; /* chardev major */
- __u32 dev_minor; /* chardev minor */
- __u32 spare[10];
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
+ uint32_t max_read;
+ uint32_t max_write;
+ uint32_t dev_major; /* chardev major */
+ uint32_t dev_minor; /* chardev minor */
+ uint32_t spare[10];
};
struct fuse_interrupt_in {
- __u64 unique;
+ uint64_t unique;
};
struct fuse_bmap_in {
- __u64 block;
- __u32 blocksize;
- __u32 padding;
+ uint64_t block;
+ uint32_t blocksize;
+ uint32_t padding;
};
struct fuse_bmap_out {
- __u64 block;
+ uint64_t block;
};
struct fuse_ioctl_in {
- __u64 fh;
- __u32 flags;
- __u32 cmd;
- __u64 arg;
- __u32 in_size;
- __u32 out_size;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t cmd;
+ uint64_t arg;
+ uint32_t in_size;
+ uint32_t out_size;
+};
+
+struct fuse_ioctl_iovec {
+ uint64_t base;
+ uint64_t len;
};
struct fuse_ioctl_out {
- __s32 result;
- __u32 flags;
- __u32 in_iovs;
- __u32 out_iovs;
+ int32_t result;
+ uint32_t flags;
+ uint32_t in_iovs;
+ uint32_t out_iovs;
};
struct fuse_poll_in {
- __u64 fh;
- __u64 kh;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t kh;
+ uint32_t flags;
+ uint32_t events;
};
struct fuse_poll_out {
- __u32 revents;
- __u32 padding;
+ uint32_t revents;
+ uint32_t padding;
};
struct fuse_notify_poll_wakeup_out {
- __u64 kh;
+ uint64_t kh;
+};
+
+struct fuse_fallocate_in {
+ uint64_t fh;
+ uint64_t offset;
+ uint64_t length;
+ uint32_t mode;
+ uint32_t padding;
};
struct fuse_in_header {
- __u32 len;
- __u32 opcode;
- __u64 unique;
- __u64 nodeid;
- __u32 uid;
- __u32 gid;
- __u32 pid;
- __u32 padding;
+ uint32_t len;
+ uint32_t opcode;
+ uint64_t unique;
+ uint64_t nodeid;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t pid;
+ uint32_t padding;
};
struct fuse_out_header {
- __u32 len;
- __s32 error;
- __u64 unique;
+ uint32_t len;
+ int32_t error;
+ uint64_t unique;
};
struct fuse_dirent {
- __u64 ino;
- __u64 off;
- __u32 namelen;
- __u32 type;
- char name[0];
+ uint64_t ino;
+ uint64_t off;
+ uint32_t namelen;
+ uint32_t type;
+ char name[];
};
#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
-#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
+#define FUSE_DIRENT_ALIGN(x) \
+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
#define FUSE_DIRENT_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+struct fuse_direntplus {
+ struct fuse_entry_out entry_out;
+ struct fuse_dirent dirent;
+};
+
+#define FUSE_NAME_OFFSET_DIRENTPLUS \
+ offsetof(struct fuse_direntplus, dirent.name)
+#define FUSE_DIRENTPLUS_SIZE(d) \
+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
+
struct fuse_notify_inval_inode_out {
- __u64 ino;
- __s64 off;
- __s64 len;
+ uint64_t ino;
+ int64_t off;
+ int64_t len;
};
struct fuse_notify_inval_entry_out {
- __u64 parent;
- __u32 namelen;
- __u32 padding;
+ uint64_t parent;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_delete_out {
+ uint64_t parent;
+ uint64_t child;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_store_out {
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+struct fuse_notify_retrieve_out {
+ uint64_t notify_unique;
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+ uint64_t dummy1;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t dummy2;
+ uint64_t dummy3;
+ uint64_t dummy4;
};
#endif /* _LINUX_FUSE_H */
diff --git a/contrib/fuse-util/mount_util.h b/contrib/fuse-include/mount_util.h
index f392f99f1..f392f99f1 100644
--- a/contrib/fuse-util/mount_util.h
+++ b/contrib/fuse-include/mount_util.h
diff --git a/contrib/fuse-lib/misc.c b/contrib/fuse-lib/misc.c
index 28a9284bf..0c41b1a19 100644
--- a/contrib/fuse-lib/misc.c
+++ b/contrib/fuse-lib/misc.c
@@ -50,5 +50,5 @@ convert_fuse_file_lock (struct fuse_file_lock *fl, struct gf_flock *flock,
else
flock->l_len = fl->end - fl->start + 1;
flock->l_pid = fl->pid;
- flock->l_owner = lk_owner;
+ set_lk_owner_from_uint64 (&flock->l_owner, lk_owner);
}
diff --git a/contrib/fuse-lib/mount-common.c b/contrib/fuse-lib/mount-common.c
new file mode 100644
index 000000000..fd6cce44e
--- /dev/null
+++ b/contrib/fuse-lib/mount-common.c
@@ -0,0 +1,265 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+
+ This program can be distributed under the terms of the GNU LGPLv2.
+ See the file COPYING.LIB.
+*/
+
+#include "mount-gluster-compat.h"
+
+/*
+ * These functions (and gf_fuse_umount() in mount.c)
+ * were originally taken from libfuse as of commit 7960e99e
+ * (http://fuse.git.sourceforge.net/git/gitweb.cgi?p=fuse/fuse;a=commit;h=7960e99e)
+ * almost verbatim. What has been changed upon adoption:
+ * - style adopted to that of glusterfs
+ * - s/fprintf/gf_log/
+ * - s/free/FREE/, s/malloc/MALLOC/
+ * - there are some other minor things
+ *
+ * For changes that were made later and syncs with upstream,
+ * see the commit log and per-function comments.
+ */
+
+#ifndef __NetBSD__
+/* FUSE: cherry-picked bd99f9cf */
+static int
+mtab_needs_update (const char *mnt)
+{
+ int res;
+ struct stat stbuf;
+
+ /* If mtab is within new mount, don't touch it */
+ if (strncmp (mnt, _PATH_MOUNTED, strlen (mnt)) == 0 &&
+ _PATH_MOUNTED[strlen (mnt)] == '/')
+ return 0;
+
+ /*
+ * Skip mtab update if /etc/mtab:
+ *
+ * - doesn't exist,
+ * - is a symlink,
+ * - is on a read-only filesystem.
+ */
+ res = lstat (_PATH_MOUNTED, &stbuf);
+ if (res == -1) {
+ if (errno == ENOENT)
+ return 0;
+ } else {
+ uid_t ruid;
+ int err;
+
+ if (S_ISLNK (stbuf.st_mode))
+ return 0;
+
+ ruid = getuid ();
+ if (ruid != 0)
+ setreuid (0, -1);
+
+ res = access (_PATH_MOUNTED, W_OK);
+ err = (res == -1) ? errno : 0;
+ if (ruid != 0)
+ setreuid (ruid, -1);
+
+ if (err == EROFS)
+ return 0;
+ }
+
+ return 1;
+}
+#else /* __NetBSD__ */
+#define mtab_needs_update(x) 1
+#endif /* __NetBSD__ */
+
+/* FUSE: called add_mount_legacy(); R.I.P. as of cbd3a2a8 */
+int
+fuse_mnt_add_mount (const char *progname, const char *fsname,
+ const char *mnt, const char *type, const char *opts)
+{
+ int res;
+ int status;
+ sigset_t blockmask;
+ sigset_t oldmask;
+
+ if (!mtab_needs_update (mnt))
+ return 0;
+
+ sigemptyset (&blockmask);
+ sigaddset (&blockmask, SIGCHLD);
+ res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: sigprocmask: %s",
+ progname, strerror (errno));
+ return -1;
+ }
+
+ res = fork ();
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
+ goto out_restore;
+ }
+ if (res == 0) {
+ char templ[] = "/tmp/fusermountXXXXXX";
+ char *tmp;
+
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ setuid (geteuid ());
+
+ /*
+ * hide in a directory, where mount isn't able to resolve
+ * fsname as a valid path
+ */
+ tmp = mkdtemp (templ);
+ if (!tmp) {
+ GFFUSE_LOGERR ("%s: failed to create temporary directory",
+ progname);
+ exit (1);
+ }
+ if (chdir (tmp)) {
+ GFFUSE_LOGERR ("%s: failed to chdir to %s: %s",
+ progname, tmp, strerror (errno));
+ exit (1);
+ }
+ rmdir (tmp);
+ execl (_PATH_MOUNT, _PATH_MOUNT, "-i", "-f", "-t", type,
+ "-o", opts, fsname, mnt, NULL);
+ GFFUSE_LOGERR ("%s: failed to execute %s: %s",
+ progname, _PATH_MOUNT, strerror (errno));
+ exit (1);
+ }
+
+ res = waitpid (res, &status, 0);
+ if (res == -1)
+ GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
+ res = (res != -1 && status == 0) ? 0 : -1;
+
+ out_restore:
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ return res;
+}
+
+char *
+fuse_mnt_resolve_path (const char *progname, const char *orig)
+{
+ char buf[PATH_MAX];
+ char *copy;
+ char *dst;
+ char *end;
+ char *lastcomp;
+ const char *toresolv;
+
+ if (!orig[0]) {
+ GFFUSE_LOGERR ("%s: invalid mountpoint '%s'", progname, orig);
+ return NULL;
+ }
+
+ copy = strdup (orig);
+ if (copy == NULL) {
+ GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
+ return NULL;
+ }
+
+ toresolv = copy;
+ lastcomp = NULL;
+ for (end = copy + strlen (copy) - 1; end > copy && *end == '/'; end --);
+ if (end[0] != '/') {
+ char *tmp;
+ end[1] = '\0';
+ tmp = strrchr (copy, '/');
+ if (tmp == NULL) {
+ lastcomp = copy;
+ toresolv = ".";
+ } else {
+ lastcomp = tmp + 1;
+ if (tmp == copy)
+ toresolv = "/";
+ }
+ if (strcmp (lastcomp, ".") == 0 || strcmp (lastcomp, "..") == 0) {
+ lastcomp = NULL;
+ toresolv = copy;
+ }
+ else if (tmp)
+ tmp[0] = '\0';
+ }
+ if (realpath (toresolv, buf) == NULL) {
+ GFFUSE_LOGERR ("%s: bad mount point %s: %s", progname, orig,
+ strerror (errno));
+ FREE (copy);
+ return NULL;
+ }
+ if (lastcomp == NULL)
+ dst = strdup (buf);
+ else {
+ dst = (char *) MALLOC (strlen (buf) + 1 + strlen (lastcomp) + 1);
+ if (dst) {
+ unsigned buflen = strlen (buf);
+ if (buflen && buf[buflen-1] == '/')
+ sprintf (dst, "%s%s", buf, lastcomp);
+ else
+ sprintf (dst, "%s/%s", buf, lastcomp);
+ }
+ }
+ FREE (copy);
+ if (dst == NULL)
+ GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
+ return dst;
+}
+
+/* FUSE: to support some changes that were reverted since
+ * then, it was split in two (fuse_mnt_umount() and
+ * exec_umount()); however the actual code is same as here
+ * since 0197ce40
+ */
+int
+fuse_mnt_umount (const char *progname, const char *abs_mnt,
+ const char *rel_mnt, int lazy)
+{
+ int res;
+ int status;
+ sigset_t blockmask;
+ sigset_t oldmask;
+
+ if (!mtab_needs_update (abs_mnt)) {
+ res = umount2 (rel_mnt, lazy ? 2 : 0);
+ if (res == -1)
+ GFFUSE_LOGERR ("%s: failed to unmount %s: %s",
+ progname, abs_mnt, strerror (errno));
+ return res;
+ }
+
+ sigemptyset (&blockmask);
+ sigaddset (&blockmask, SIGCHLD);
+ res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: sigprocmask: %s", progname,
+ strerror (errno));
+ return -1;
+ }
+
+ res = fork ();
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
+ goto out_restore;
+ }
+ if (res == 0) {
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ setuid (geteuid ());
+ execl ("/bin/umount", "/bin/umount", "-i", rel_mnt,
+ lazy ? "-l" : NULL, NULL);
+ GFFUSE_LOGERR ("%s: failed to execute /bin/umount: %s",
+ progname, strerror (errno));
+ exit (1);
+ }
+ res = waitpid (res, &status, 0);
+ if (res == -1)
+ GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
+
+ if (status != 0)
+ res = -1;
+
+ out_restore:
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ return res;
+}
diff --git a/contrib/fuse-lib/mount-gluster-compat.h b/contrib/fuse-lib/mount-gluster-compat.h
new file mode 100644
index 000000000..4fc20623b
--- /dev/null
+++ b/contrib/fuse-lib/mount-gluster-compat.h
@@ -0,0 +1,56 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+
+ This program can be distributed under the terms of the GNU LGPLv2.
+ See the file COPYING.LIB.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <dirent.h>
+#include <signal.h>
+#ifndef __NetBSD__
+#include <mntent.h>
+#endif /* __NetBSD__ */
+#include <sys/stat.h>
+#include <sys/poll.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+
+#ifdef __NetBSD__
+#include <perfuse.h>
+#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#define MS_RDONLY MNT_RDONLY
+#endif
+
+#ifdef linux
+#define _PATH_MOUNT "/bin/mount"
+#else /* NetBSD, MacOS X */
+#define _PATH_MOUNT "/sbin/mount"
+#endif
+
+#ifdef FUSE_UTIL
+#define MALLOC(size) malloc (size)
+#define FREE(ptr) free (ptr)
+#define GFFUSE_LOGERR(...) fprintf (stderr, ## __VA_ARGS__)
+#else /* FUSE_UTIL */
+#include "glusterfs.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#define GFFUSE_LOGERR(...) \
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
+#endif /* !FUSE_UTIL */
diff --git a/contrib/fuse-lib/mount.c b/contrib/fuse-lib/mount.c
index 85a366894..922d9e464 100644
--- a/contrib/fuse-lib/mount.c
+++ b/contrib/fuse-lib/mount.c
@@ -7,335 +7,140 @@
See the file COPYING.LIB.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stddef.h>
-#include <limits.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <dirent.h>
-#ifndef __NetBSD__
-#include <mntent.h>
-#endif /* __NetBSD__ */
-#include <sys/stat.h>
-#include <sys/poll.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/wait.h>
-#include <sys/mount.h>
-
-#ifdef __NetBSD__
-#include <perfuse.h>
-#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
-#endif
-
-#ifdef FUSE_UTIL
-#define MALLOC(size) malloc (size)
-#define FREE(ptr) free (ptr)
-#define GFFUSE_LOGERR(...) fprintf (stderr, ## __VA_ARGS__)
-#else /* FUSE_UTIL */
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include "mount_util.h"
+#include "mount-gluster-compat.h"
#ifdef GF_FUSERMOUNT
#define FUSERMOUNT_PROG FUSERMOUNT_DIR "/fusermount-glusterfs"
#else
#define FUSERMOUNT_PROG "fusermount"
#endif
-#define FUSE_COMMFD_ENV "_FUSE_COMMFD"
+#define FUSE_DEVFD_ENV "_FUSE_DEVFD"
-#define GFFUSE_LOGERR(...) \
- gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
-#endif /* !FUSE_UTIL */
-
-/*
- * Functions below, until following note, were taken from libfuse
- * (http://git.gluster.com/?p=users/csaba/fuse.git;a=commit;h=b988bbf9)
- * almost verbatim. What has been changed:
- * - style adopted to that of glusterfs
- * - s/fprintf/gf_log/
- * - s/free/FREE/, s/malloc/MALLOC/
- * - there are some other minor things
- */
-#ifndef __NetBSD__
-static int
-mtab_needs_update (const char *mnt)
+/* FUSE: function is called fuse_kern_unmount() */
+void
+gf_fuse_unmount (const char *mountpoint, int fd)
{
int res;
- struct stat stbuf;
-
- /* If mtab is within new mount, don't touch it */
- if (strncmp (mnt, _PATH_MOUNTED, strlen (mnt)) == 0 &&
- _PATH_MOUNTED[strlen (mnt)] == '/')
- return 0;
-
- /*
- * Skip mtab update if /etc/mtab:
- *
- * - doesn't exist,
- * - is a symlink,
- * - is on a read-only filesystem.
- */
- res = lstat (_PATH_MOUNTED, &stbuf);
- if (res == -1) {
- if (errno == ENOENT)
- return 0;
- } else {
- if (S_ISLNK (stbuf.st_mode))
- return 0;
-
- res = access (_PATH_MOUNTED, W_OK);
- if (res == -1 && errno == EROFS)
- return 0;
- }
+ int pid;
- return 1;
-}
-#else /* __NetBSD__ */
-#define mtab_needs_update(x) 1
-#endif /* __NetBSD__ */
+ if (!mountpoint)
+ return;
-#ifndef FUSE_UTIL
-static
-#endif
-int
-fuse_mnt_add_mount (const char *progname, const char *fsname,
- const char *mnt, const char *type, const char *opts,
- pid_t *mtab_pid)
-{
- int res;
- int status;
- sigset_t blockmask;
- sigset_t oldmask;
-
- if (!mtab_needs_update (mnt))
- return 0;
-
- sigemptyset (&blockmask);
- sigaddset (&blockmask, SIGCHLD);
- res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
- if (res == -1) {
- GFFUSE_LOGERR ("%s: sigprocmask: %s",
- progname, strerror (errno));
- return -1;
- }
+ if (fd != -1) {
+ struct pollfd pfd;
- res = fork ();
- if (res == -1) {
- GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
- goto out_restore;
- }
- if (res == 0) {
- char templ[] = "/tmp/fusermountXXXXXX";
- char *tmp;
-
- if (!mtab_pid) {
- /* mtab update done async, just log if fails */
- res = fork ();
- if (res)
- exit (res == -1 ? 1 : 0);
- res = fork ();
- if (res) {
- if (res != -1) {
- if (!(res == waitpid (res, &status, 0)
- && status == 0))
- GFFUSE_LOGERR ("%s: /etc/mtab "
- "update failed",
- progname);
- }
- exit (0);
- }
- }
+ pfd.fd = fd;
+ pfd.events = 0;
+ res = poll (&pfd, 1, 0);
+ /* If file poll returns POLLERR on the device file descriptor,
+ then the filesystem is already unmounted */
+ if (res == 1 && (pfd.revents & POLLERR))
+ return;
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- setuid (geteuid ());
-
- /*
- * hide in a directory, where mount isn't able to resolve
- * fsname as a valid path
- */
- tmp = mkdtemp (templ);
- if (!tmp) {
- GFFUSE_LOGERR ("%s: failed to create temporary directory",
- progname);
- exit (1);
- }
- if (chdir (tmp)) {
- GFFUSE_LOGERR ("%s: failed to chdir to %s: %s",
- progname, tmp, strerror (errno));
- exit (1);
- }
- rmdir (tmp);
- execl ("/bin/mount", "/bin/mount", "-i", "-f", "-t", type,
- "-o", opts, fsname, mnt, NULL);
- GFFUSE_LOGERR ("%s: failed to execute /bin/mount: %s",
- progname, strerror (errno));
- exit (1);
+ /* Need to close file descriptor, otherwise synchronous umount
+ would recurse into filesystem, and deadlock */
+ close (fd);
}
- if (mtab_pid) {
- *mtab_pid = res;
- res = 0;
- } else {
- if (!(res == waitpid (res, &status, 0) && status == 0))
- res = -1;
+
+ if (geteuid () == 0) {
+ fuse_mnt_umount ("fuse", mountpoint, mountpoint, 1);
+ return;
}
- if (res == -1)
- GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
- out_restore:
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- return res;
-}
+ res = umount2 (mountpoint, 2);
+ if (res == 0)
+ return;
-#ifndef FUSE_UTIL
-static
-#endif
-char
-*fuse_mnt_resolve_path (const char *progname, const char *orig)
-{
- char buf[PATH_MAX];
- char *copy;
- char *dst;
- char *end;
- char *lastcomp;
- const char *toresolv;
-
- if (!orig[0]) {
- GFFUSE_LOGERR ("%s: invalid mountpoint '%s'", progname, orig);
- return NULL;
- }
+ pid = fork ();
+ if (pid == -1)
+ return;
- copy = strdup (orig);
- if (copy == NULL) {
- GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
- return NULL;
- }
+ if (pid == 0) {
+ const char *argv[] = { FUSERMOUNT_PROG, "-u", "-q", "-z",
+ "--", mountpoint, NULL };
- toresolv = copy;
- lastcomp = NULL;
- for (end = copy + strlen (copy) - 1; end > copy && *end == '/'; end --);
- if (end[0] != '/') {
- char *tmp;
- end[1] = '\0';
- tmp = strrchr (copy, '/');
- if (tmp == NULL) {
- lastcomp = copy;
- toresolv = ".";
- } else {
- lastcomp = tmp + 1;
- if (tmp == copy)
- toresolv = "/";
- }
- if (strcmp (lastcomp, ".") == 0 || strcmp (lastcomp, "..") == 0) {
- lastcomp = NULL;
- toresolv = copy;
- }
- else if (tmp)
- tmp[0] = '\0';
- }
- if (realpath (toresolv, buf) == NULL) {
- GFFUSE_LOGERR ("%s: bad mount point %s: %s", progname, orig,
- strerror (errno));
- FREE (copy);
- return NULL;
- }
- if (lastcomp == NULL)
- dst = strdup (buf);
- else {
- dst = (char *) MALLOC (strlen (buf) + 1 + strlen (lastcomp) + 1);
- if (dst) {
- unsigned buflen = strlen (buf);
- if (buflen && buf[buflen-1] == '/')
- sprintf (dst, "%s%s", buf, lastcomp);
- else
- sprintf (dst, "%s/%s", buf, lastcomp);
- }
+ execvp (FUSERMOUNT_PROG, (char **)argv);
+ _exit (1);
}
- FREE (copy);
- if (dst == NULL)
- GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
- return dst;
+ waitpid (pid, NULL, 0);
}
-#ifndef FUSE_UTIL
-/* return value:
- * >= 0 => fd
- * -1 => error
- */
-static int
-receive_fd (int fd)
+
+/* gluster-specific routines */
+
+static char *
+escape (char *s)
{
- struct msghdr msg;
- struct iovec iov;
- char buf[1];
- int rv;
- size_t ccmsg[CMSG_SPACE (sizeof (int)) / sizeof (size_t)];
- struct cmsghdr *cmsg;
- int *recv_fd;
-
- iov.iov_base = buf;
- iov.iov_len = 1;
-
- msg.msg_name = 0;
- msg.msg_namelen = 0;
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- /* old BSD implementations should use msg_accrights instead of
- * msg_control; the interface is different. */
- msg.msg_control = ccmsg;
- msg.msg_controllen = sizeof (ccmsg);
-
- while (((rv = recvmsg (fd, &msg, 0)) == -1) && errno == EINTR);
- if (rv == -1) {
- GFFUSE_LOGERR ("recvmsg failed: %s", strerror (errno));
- return -1;
- }
- if (!rv) {
- /* EOF */
- return -1;
+ size_t len = 0;
+ char *p = NULL;
+ char *q = NULL;
+ char *e = NULL;
+
+ for (p = s; *p; p++) {
+ if (*p == ',')
+ len++;
+ len++;
}
- cmsg = CMSG_FIRSTHDR (&msg);
- /*
- * simplify condition expression
- */
- if (cmsg->cmsg_type != SCM_RIGHTS) {
- GFFUSE_LOGERR ("got control message of unknown type %d",
- cmsg->cmsg_type);
- return -1;
+ e = CALLOC (1, len + 1);
+ if (!e)
+ return NULL;
+
+ for (p = s, q = e; *p; p++, q++) {
+ if (*p == ',') {
+ *q = '\\';
+ q++;
+ }
+ *q = *p;
}
- recv_fd = (int *) CMSG_DATA (cmsg);
- return (*recv_fd);
+ return e;
}
static int
-fuse_mount_fusermount (const char *mountpoint, const char *opts)
+fuse_mount_fusermount (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param,
+ int fd)
{
- int fds[2], pid;
- int res;
- int rv;
+ int pid = -1;
+ int res = 0;
+ int ret = -1;
+ char *fm_mnt_params = NULL;
+ char *efsname = NULL;
+
+#ifndef GF_FUSERMOUNT
+ GFFUSE_LOGERR ("Mounting via helper utility "
+ "(unprivileged mounting) is supported "
+ "only if glusterfs is compiled with "
+ "--enable-fusermount");
+ return -1;
+#endif
+
+ efsname = escape (fsname);
+ if (!efsname) {
+ GFFUSE_LOGERR ("Out of memory");
- res = socketpair (PF_UNIX, SOCK_STREAM, 0, fds);
- if (res == -1) {
- GFFUSE_LOGERR ("socketpair() failed: %s", strerror (errno));
return -1;
}
+ ret = asprintf (&fm_mnt_params,
+ "%s%s,fsname=%s,nonempty,subtype=glusterfs",
+ (mountflags & MS_RDONLY) ? "ro," : "",
+ mnt_param, efsname);
+ FREE (efsname);
+ if (ret == -1) {
+ GFFUSE_LOGERR ("Out of memory");
+
+ goto out;
+ }
+ /* fork to exec fusermount */
pid = fork ();
if (pid == -1) {
GFFUSE_LOGERR ("fork() failed: %s", strerror (errno));
- close (fds[0]);
- close (fds[1]);
- return -1;
+ ret = -1;
+ goto out;
}
if (pid == 0) {
@@ -344,214 +149,37 @@ fuse_mount_fusermount (const char *mountpoint, const char *opts)
int a = 0;
argv[a++] = FUSERMOUNT_PROG;
- if (opts) {
- argv[a++] = "-o";
- argv[a++] = opts;
- }
+ argv[a++] = "-o";
+ argv[a++] = fm_mnt_params;
argv[a++] = "--";
argv[a++] = mountpoint;
argv[a++] = NULL;
- close (fds[1]);
- fcntl (fds[0], F_SETFD, 0);
- snprintf (env, sizeof (env), "%i", fds[0]);
- setenv (FUSE_COMMFD_ENV, env, 1);
+ snprintf (env, sizeof (env), "%i", fd);
+ setenv (FUSE_DEVFD_ENV, env, 1);
execvp (FUSERMOUNT_PROG, (char **)argv);
GFFUSE_LOGERR ("failed to exec fusermount: %s",
strerror (errno));
_exit (1);
}
- close (fds[0]);
- rv = receive_fd (fds[1]);
- close (fds[1]);
- waitpid (pid, NULL, 0); /* bury zombie */
-
- return rv;
-}
-#endif
-
-#ifndef FUSE_UTIL
-static
-#endif
-int
-fuse_mnt_umount (const char *progname, const char *abs_mnt,
- const char *rel_mnt, int lazy)
-{
- int res;
- int status;
- sigset_t blockmask;
- sigset_t oldmask;
-
- if (!mtab_needs_update (abs_mnt)) {
- res = umount2 (rel_mnt, lazy ? 2 : 0);
- if (res == -1)
- GFFUSE_LOGERR ("%s: failed to unmount %s: %s",
- progname, abs_mnt, strerror (errno));
- return res;
- }
-
- sigemptyset (&blockmask);
- sigaddset (&blockmask, SIGCHLD);
- res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
- if (res == -1) {
- GFFUSE_LOGERR ("%s: sigprocmask: %s", progname,
- strerror (errno));
- return -1;
- }
-
- res = fork ();
- if (res == -1) {
- GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
- goto out_restore;
- }
- if (res == 0) {
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- setuid (geteuid ());
- execl ("/bin/umount", "/bin/umount", "-i", rel_mnt,
- lazy ? "-l" : NULL, NULL);
- GFFUSE_LOGERR ("%s: failed to execute /bin/umount: %s",
- progname, strerror (errno));
- exit (1);
- }
- res = waitpid (res, &status, 0);
- if (res == -1)
- GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
-
- if (status != 0)
- res = -1;
-
- out_restore:
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- return res;
-}
-
-#ifdef FUSE_UTIL
-int
-fuse_mnt_check_empty (const char *progname, const char *mnt,
- mode_t rootmode, off_t rootsize)
-{
- int isempty = 1;
-
- if (S_ISDIR (rootmode)) {
- struct dirent *ent;
- DIR *dp = opendir (mnt);
- if (dp == NULL) {
- fprintf (stderr,
- "%s: failed to open mountpoint for reading: %s\n",
- progname, strerror (errno));
- return -1;
- }
- while ((ent = readdir (dp)) != NULL) {
- if (strcmp (ent->d_name, ".") != 0 &&
- strcmp (ent->d_name, "..") != 0) {
- isempty = 0;
- break;
- }
- }
- closedir (dp);
- } else if (rootsize)
- isempty = 0;
-
- if (!isempty) {
- fprintf (stderr, "%s: mountpoint is not empty\n", progname);
- fprintf (stderr, "%s: if you are sure this is safe, "
- "use the 'nonempty' mount option\n", progname);
- return -1;
- }
- return 0;
-}
-
-int
-fuse_mnt_check_fuseblk (void)
-{
- char buf[256];
- FILE *f = fopen ("/proc/filesystems", "r");
- if (!f)
- return 1;
-
- while (fgets (buf, sizeof (buf), f))
- if (strstr (buf, "fuseblk\n")) {
- fclose (f);
- return 1;
- }
-
- fclose (f);
- return 0;
-}
-#endif
-
-#ifndef FUSE_UTIL
-void
-gf_fuse_unmount (const char *mountpoint, int fd)
-{
- int res;
- int pid;
-
- if (!mountpoint)
- return;
-
- if (fd != -1) {
- struct pollfd pfd;
-
- pfd.fd = fd;
- pfd.events = 0;
- res = poll (&pfd, 1, 0);
- /* If file poll returns POLLERR on the device file descriptor,
- then the filesystem is already unmounted */
- if (res == 1 && (pfd.revents & POLLERR))
- return;
-
- /* Need to close file descriptor, otherwise synchronous umount
- would recurse into filesystem, and deadlock */
- close (fd);
- }
-
- if (geteuid () == 0) {
- fuse_mnt_umount ("fuse", mountpoint, mountpoint, 1);
- return;
- }
-
- res = umount2 (mountpoint, 2);
- if (res == 0)
- return;
-
- pid = fork ();
- if (pid == -1)
- return;
-
- if (pid == 0) {
- const char *argv[] = { FUSERMOUNT_PROG, "-u", "-q", "-z",
- "--", mountpoint, NULL };
-
- execvp (FUSERMOUNT_PROG, (char **)argv);
- _exit (1);
- }
- waitpid (pid, NULL, 0);
+ ret = waitpid (pid, &res, 0);
+ ret = (ret == pid && res == 0) ? 0 : -1;
+ out:
+ FREE (fm_mnt_params);
+ return ret;
}
-#endif
-/*
- * Functions below are loosely modelled after similar functions of libfuse
- */
-
-#ifndef FUSE_UTIL
static int
-fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, pid_t *mtab_pid)
+fuse_mount_sys (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param, int fd)
{
- int fd = -1, ret = -1;
+ int ret = -1;
unsigned mounted = 0;
char *mnt_param_mnt = NULL;
char *fstype = "fuse.glusterfs";
char *source = fsname;
- fd = open ("/dev/fuse", O_RDWR);
- if (fd == -1) {
- GFFUSE_LOGERR ("cannot open /dev/fuse (%s)", strerror (errno));
-
- return -1;
- }
-
ret = asprintf (&mnt_param_mnt,
"%s,fd=%i,rootmode=%o,user_id=%i,group_id=%i",
mnt_param, fd, S_IFDIR, getuid (), getgid ());
@@ -560,7 +188,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, pid_t *mt
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
if (ret == -1 && errno == ENODEV) {
/* fs subtype support was added by 79c0b2df aka
@@ -573,7 +201,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, pid_t *mt
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
}
if (ret == -1)
@@ -581,8 +209,10 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, pid_t *mt
else
mounted = 1;
+#ifndef __NetBSD__
if (geteuid () == 0) {
char *newmnt = fuse_mnt_resolve_path ("fuse", mountpoint);
+ char *mnt_param_mtab = NULL;
if (!newmnt) {
ret = -1;
@@ -590,8 +220,17 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, pid_t *mt
goto out;
}
- ret = fuse_mnt_add_mount ("fuse", source, newmnt, fstype,
- mnt_param, mtab_pid);
+ ret = asprintf (&mnt_param_mtab, "%s%s",
+ mountflags & MS_RDONLY ? "ro," : "",
+ mnt_param);
+ if (ret == -1)
+ GFFUSE_LOGERR ("Out of memory");
+ else {
+ ret = fuse_mnt_add_mount ("fuse", source, newmnt,
+ fstype, mnt_param_mtab);
+ FREE (mnt_param_mtab);
+ }
+
FREE (newmnt);
if (ret == -1) {
GFFUSE_LOGERR ("failed to add mtab entry");
@@ -599,95 +238,76 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, pid_t *mt
goto out;
}
}
+#endif /* __NetBSD__ */
- out:
+out:
if (ret == -1) {
if (mounted)
umount2 (mountpoint, 2); /* lazy umount */
- close (fd);
- fd = -1;
}
FREE (mnt_param_mnt);
if (source != fsname)
FREE (source);
- return fd;
-}
-
-static char *
-escape (char *s)
-{
- size_t len = 0;
- char *p = NULL;
- char *q = NULL;
- char *e = NULL;
- for (p = s; *p; p++) {
- if (*p == ',')
- len++;
- len++;
- }
-
- e = CALLOC (1, len + 1);
- if (!e)
- return NULL;
-
- for (p = s, q = e; *p; p++, q++) {
- if (*p == ',') {
- *q = '\\';
- q++;
- }
- *q = *p;
- }
-
- return e;
+ return ret;
}
int
-gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
- pid_t *mtab_pid)
+gf_fuse_mount (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param,
+ pid_t *mnt_pid, int status_fd)
{
- int fd = -1, rv = -1;
- char *fm_mnt_params = NULL, *p = NULL;
- char *efsname = NULL;
+ int fd = -1;
+ pid_t pid = -1;
+ int ret = -1;
- fd = fuse_mount_sys (mountpoint, fsname, mnt_param, mtab_pid);
+ fd = open ("/dev/fuse", O_RDWR);
if (fd == -1) {
- gf_log ("glusterfs-fuse", GF_LOG_INFO,
- "direct mount failed (%s), "
- "retry to mount via fusermount",
- strerror (errno));
-
- efsname = escape (fsname);
- if (!efsname) {
- GFFUSE_LOGERR ("Out of memory");
-
- return -1;
- }
- rv = asprintf (&fm_mnt_params,
- "%s,fsname=%s,nonempty,subtype=glusterfs",
- mnt_param, efsname);
- FREE (efsname);
- if (rv == -1) {
- GFFUSE_LOGERR ("Out of memory");
+ GFFUSE_LOGERR ("cannot open /dev/fuse (%s)",
+ strerror (errno));
+ return -1;
+ }
- return -1;
+ /* start mount agent */
+ pid = fork();
+ switch (pid) {
+ case 0:
+ /* hello it's mount agent */
+ if (!mnt_pid) {
+ /* daemonize mount agent, caller is
+ * not interested in waiting for it
+ */
+ pid = fork ();
+ if (pid)
+ exit (pid == -1 ? 1 : 0);
}
- fd = fuse_mount_fusermount (mountpoint, fm_mnt_params);
- if (fd == -1) {
- p = fm_mnt_params + strlen (fm_mnt_params);
- while (*--p != ',');
- *p = '\0';
+ ret = fuse_mount_sys (mountpoint, fsname, mountflags, mnt_param, fd);
+ if (ret == -1) {
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "direct mount failed (%s), "
+ "retry to mount via fusermount",
+ strerror (errno));
- fd = fuse_mount_fusermount (mountpoint, fm_mnt_params);
+ ret = fuse_mount_fusermount (mountpoint, fsname, mountflags,
+ mnt_param, fd);
}
- FREE (fm_mnt_params);
+ if (ret == -1)
+ GFFUSE_LOGERR ("mount of %s to %s (%s) failed",
+ fsname, mountpoint, mnt_param);
- if (fd == -1)
- GFFUSE_LOGERR ("mount failed");
+ if (status_fd >= 0)
+ (void)write (status_fd, &ret, sizeof (ret));
+ exit (!!ret);
+ /* bye mount agent */
+ case -1:
+ close (fd);
+ fd = -1;
}
+ if (mnt_pid)
+ *mnt_pid = pid;
+
return fd;
}
-#endif
diff --git a/contrib/fuse-util/Makefile.am b/contrib/fuse-util/Makefile.am
index 42609a688..971d3d220 100644
--- a/contrib/fuse-util/Makefile.am
+++ b/contrib/fuse-util/Makefile.am
@@ -1,9 +1,11 @@
bin_PROGRAMS = fusermount-glusterfs
-fusermount_glusterfs_SOURCES = fusermount.c $(CONTRIBDIR)/fuse-lib/mount.c
-noinst_HEADERS = mount_util.h
+fusermount_glusterfs_SOURCES = fusermount.c mount_util.c $(CONTRIBDIR)/fuse-lib/mount-common.c
+noinst_HEADERS = $(CONTRIBDIR)/fuse-include/mount_util.h
-AM_CFLAGS = -Wall -D_FILE_OFFSET_BITS=64 -DFUSE_UTIL $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -DFUSE_UTIL -I$(CONTRIBDIR)/fuse-include
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
install-exec-hook:
-chown root $(DESTDIR)$(bindir)/fusermount-glusterfs
diff --git a/contrib/fuse-util/fusermount.c b/contrib/fuse-util/fusermount.c
index 39da9b6a0..0ff8d9039 100644
--- a/contrib/fuse-util/fusermount.c
+++ b/contrib/fuse-util/fusermount.c
@@ -19,6 +19,7 @@
#include <errno.h>
#include <fcntl.h>
#include <pwd.h>
+#include <limits.h>
#include <mntent.h>
#include <sys/wait.h>
#include <sys/stat.h>
@@ -28,6 +29,7 @@
#include <sys/utsname.h>
#include <sched.h>
+#define FUSE_DEVFD_ENV "_FUSE_DEVFD"
#define FUSE_COMMFD_ENV "_FUSE_COMMFD"
#define FUSE_DEV_OLD "/proc/fs/fuse/dev"
@@ -114,8 +116,16 @@ static int lock_umount(void)
static void unlock_umount(int mtablock)
{
- lockf(mtablock, F_ULOCK, 0);
- close(mtablock);
+ if (mtablock >= 0) {
+ int res;
+
+ res = lockf(mtablock, F_ULOCK, 0);
+ if (res < 0) {
+ fprintf(stderr, "%s: error releasing lock: %s\n",
+ progname, strerror(errno));
+ }
+ close(mtablock);
+ }
}
static int add_mount(const char *source, const char *mnt, const char *type,
@@ -238,7 +248,7 @@ static int check_is_mount_child(void *p)
}
count = 0;
- while ((entp = getmntent(fp)) != NULL)
+ while (getmntent(fp) != NULL)
count++;
endmntent(fp);
@@ -325,7 +335,7 @@ static int check_is_mount(const char *last, const char *mnt)
return 0;
}
-static int chdir_to_parent(char *copy, const char **lastp, int *currdir_fd)
+static int chdir_to_parent(char *copy, const char **lastp)
{
char *tmp;
const char *parent;
@@ -350,14 +360,6 @@ static int chdir_to_parent(char *copy, const char **lastp, int *currdir_fd)
parent = "/";
}
- *currdir_fd = open(".", O_RDONLY);
- if (*currdir_fd == -1) {
- fprintf(stderr,
- "%s: failed to open current directory: %s\n",
- progname, strerror(errno));
- return -1;
- }
-
res = chdir(parent);
if (res == -1) {
fprintf(stderr, "%s: failed to chdir to %s: %s\n",
@@ -382,7 +384,6 @@ static int chdir_to_parent(char *copy, const char **lastp, int *currdir_fd)
static int unmount_fuse_locked(const char *mnt, int quiet, int lazy)
{
- int currdir_fd = -1;
char *copy;
const char *last;
int res;
@@ -399,7 +400,7 @@ static int unmount_fuse_locked(const char *mnt, int quiet, int lazy)
return -1;
}
- res = chdir_to_parent(copy, &last, &currdir_fd);
+ res = chdir_to_parent(copy, &last);
if (res == -1)
goto out;
@@ -411,10 +412,6 @@ static int unmount_fuse_locked(const char *mnt, int quiet, int lazy)
out:
free(copy);
- if (currdir_fd != -1) {
- fchdir(currdir_fd);
- close(currdir_fd);
- }
return res;
}
@@ -508,13 +505,13 @@ static void read_conf(void)
int isnewline = 1;
while (fgets(line, sizeof(line), fp) != NULL) {
if (isnewline) {
- if (line[strlen(line)-1] == '\n') {
+ if (strlen(line) && line[strlen(line)-1] == '\n') {
strip_line(line);
parse_line(line, linenum);
} else {
isnewline = 0;
}
- } else if(line[strlen(line)-1] == '\n') {
+ } else if(strlen(line) && line[strlen(line)-1] == '\n') {
fprintf(stderr, "%s: reading %s: line %i too long\n", progname, FUSE_CONF, linenum);
isnewline = 1;
@@ -609,7 +606,7 @@ static int add_option(char **optsp, const char *opt, unsigned expand)
static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
{
int i;
- int l;
+ size_t l;
if (!(flags & MS_RDONLY) && add_option(mnt_optsp, "rw", 0) == -1)
return -1;
@@ -624,7 +621,7 @@ static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
return -1;
/* remove comma from end of opts*/
l = strlen(*mnt_optsp);
- if ((*mnt_optsp)[l-1] == ',')
+ if (l && (*mnt_optsp)[l-1] == ',')
(*mnt_optsp)[l-1] = '\0';
if (getuid() != 0) {
const char *user = get_user_name();
@@ -653,8 +650,7 @@ static int get_string_opt(const char *s, unsigned len, const char *opt,
unsigned opt_len = strlen(opt);
char *d;
- if (*val)
- free(*val);
+ free(*val);
*val = (char *) malloc(len - opt_len + 1);
if (!*val) {
fprintf(stderr, "%s: failed to allocate memory\n", progname);
@@ -823,15 +819,14 @@ static int do_mount(const char *mnt, char **typep, mode_t rootmode,
fprintf(stderr, "%s: mount failed: %s\n", progname,
strerror(errno_save));
goto err;
- } else {
- *sourcep = source;
- *typep = type;
- *mnt_optsp = mnt_opts;
}
+ *sourcep = source;
+ *typep = type;
+ *mnt_optsp = mnt_opts;
free(fsname);
free(optbuf);
- return res;
+ return 0;
err:
free(fsname);
@@ -874,8 +869,7 @@ static int check_version(const char *dev)
return 0;
}
-static int check_perm(const char **mntp, struct stat *stbuf, int *currdir_fd,
- int *mountpoint_fd)
+static int check_perm(const char **mntp, struct stat *stbuf, int *mountpoint_fd)
{
int res;
const char *mnt = *mntp;
@@ -893,13 +887,6 @@ static int check_perm(const char **mntp, struct stat *stbuf, int *currdir_fd,
return 0;
if (S_ISDIR(stbuf->st_mode)) {
- *currdir_fd = open(".", O_RDONLY);
- if (*currdir_fd == -1) {
- fprintf(stderr,
- "%s: failed to open current directory: %s\n",
- progname, strerror(errno));
- return -1;
- }
res = chdir(mnt);
if (res == -1) {
fprintf(stderr,
@@ -1016,8 +1003,36 @@ static int open_fuse_device(char **devp)
return -1;
}
+static int check_fuse_device(char *devfd, char **devp)
+{
+ int res;
+ char *devlink;
-static int mount_fuse(const char *mnt, const char *opts)
+ res = asprintf(&devlink, "/proc/self/fd/%s", devfd);
+ if (res == -1) {
+ fprintf(stderr, "%s: failed to allocate memory\n", progname);
+ return -1;
+ }
+
+ *devp = (char *) calloc(1, PATH_MAX + 1);
+ if (!*devp) {
+ fprintf(stderr, "%s: failed to allocate memory\n", progname);
+ free(devlink);
+ return -1;
+ }
+
+ res = readlink (devlink, *devp, PATH_MAX);
+ free (devlink);
+ if (res == -1) {
+ fprintf(stderr, "%s: specified fuse fd is invalid\n",
+ progname);
+ return -1;
+ }
+
+ return atoi(devfd);
+}
+
+static int mount_fuse(const char *mnt, const char *opts, char *devfd)
{
int res;
int fd;
@@ -1027,10 +1042,9 @@ static int mount_fuse(const char *mnt, const char *opts)
char *source = NULL;
char *mnt_opts = NULL;
const char *real_mnt = mnt;
- int currdir_fd = -1;
int mountpoint_fd = -1;
- fd = open_fuse_device(&dev);
+ fd = devfd ? check_fuse_device(devfd, &dev) : open_fuse_device(&dev);
if (fd == -1)
return -1;
@@ -1041,15 +1055,13 @@ static int mount_fuse(const char *mnt, const char *opts)
int mount_count = count_fuse_fs();
if (mount_count >= mount_max) {
fprintf(stderr, "%s: too many FUSE filesystems mounted; mount_max=N can be set in /etc/fuse.conf\n", progname);
- close(fd);
- return -1;
+ goto fail_close_fd;
}
}
res = check_version(dev);
if (res != -1) {
- res = check_perm(&real_mnt, &stbuf, &currdir_fd,
- &mountpoint_fd);
+ res = check_perm(&real_mnt, &stbuf, &mountpoint_fd);
restore_privs();
if (res != -1)
res = do_mount(real_mnt, &type, stbuf.st_mode & S_IFMT,
@@ -1058,33 +1070,38 @@ static int mount_fuse(const char *mnt, const char *opts)
} else
restore_privs();
- if (currdir_fd != -1) {
- fchdir(currdir_fd);
- close(currdir_fd);
- }
if (mountpoint_fd != -1)
close(mountpoint_fd);
+ if (res == -1)
+ goto fail_close_fd;
+
+ res = chdir("/");
if (res == -1) {
- close(fd);
- return -1;
+ fprintf(stderr, "%s: failed to chdir to '/'\n", progname);
+ goto fail_close_fd;
}
if (geteuid() == 0) {
res = add_mount(source, mnt, type, mnt_opts);
if (res == -1) {
- umount2(mnt, 2); /* lazy umount */
- close(fd);
- return -1;
+ /* Can't clean up mount in a non-racy way */
+ goto fail_close_fd;
}
}
+out_free:
free(source);
free(type);
free(mnt_opts);
free(dev);
return fd;
+
+fail_close_fd:
+ close(fd);
+ fd = -1;
+ goto out_free;
}
static int send_fd(int sock_fd, int fd)
@@ -1154,6 +1171,7 @@ int main(int argc, char *argv[])
static int unmount = 0;
static int lazy = 0;
static int quiet = 0;
+ char *devfd;
char *commfd;
int cfd;
const char *opts = "";
@@ -1222,6 +1240,13 @@ int main(int argc, char *argv[])
drop_privs();
mnt = fuse_mnt_resolve_path(progname, origmnt);
+ if (mnt != NULL) {
+ res = chdir("/");
+ if (res == -1) {
+ fprintf(stderr, "%s: failed to chdir to '/'\n", progname);
+ exit(1);
+ }
+ }
restore_privs();
if (mnt == NULL)
exit(1);
@@ -1242,21 +1267,26 @@ int main(int argc, char *argv[])
return 0;
}
- commfd = getenv(FUSE_COMMFD_ENV);
- if (commfd == NULL) {
- fprintf(stderr, "%s: old style mounting not supported\n",
- progname);
- exit(1);
+ devfd = getenv(FUSE_DEVFD_ENV);
+ if (devfd == NULL) {
+ commfd = getenv(FUSE_COMMFD_ENV);
+ if (commfd == NULL) {
+ fprintf(stderr, "%s: old style mounting not supported\n",
+ progname);
+ exit(1);
+ }
}
- fd = mount_fuse(mnt, opts);
+ fd = mount_fuse(mnt, opts, devfd);
if (fd == -1)
exit(1);
- cfd = atoi(commfd);
- res = send_fd(cfd, fd);
- if (res == -1)
- exit(1);
+ if (devfd == NULL) {
+ cfd = atoi(commfd);
+ res = send_fd(cfd, fd);
+ if (res == -1)
+ exit(1);
+ }
return 0;
}
diff --git a/contrib/fuse-util/mount_util.c b/contrib/fuse-util/mount_util.c
new file mode 100644
index 000000000..911b84445
--- /dev/null
+++ b/contrib/fuse-util/mount_util.c
@@ -0,0 +1,64 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU LGPLv2.
+ See the file COPYING.LIB.
+*/
+
+#include <dirent.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int fuse_mnt_check_empty(const char *progname, const char *mnt,
+ mode_t rootmode, off_t rootsize)
+{
+ int isempty = 1;
+
+ if (S_ISDIR(rootmode)) {
+ struct dirent *ent;
+ DIR *dp = opendir(mnt);
+ if (dp == NULL) {
+ fprintf(stderr,
+ "%s: failed to open mountpoint for reading: %s\n",
+ progname, strerror(errno));
+ return -1;
+ }
+ while ((ent = readdir(dp)) != NULL) {
+ if (strcmp(ent->d_name, ".") != 0 &&
+ strcmp(ent->d_name, "..") != 0) {
+ isempty = 0;
+ break;
+ }
+ }
+ closedir(dp);
+ } else if (rootsize)
+ isempty = 0;
+
+ if (!isempty) {
+ fprintf(stderr, "%s: mountpoint is not empty\n", progname);
+ fprintf(stderr, "%s: if you are sure this is safe, use the 'nonempty' mount option\n", progname);
+ return -1;
+ }
+ return 0;
+}
+
+int fuse_mnt_check_fuseblk(void)
+{
+ char buf[256];
+ FILE *f = fopen("/proc/filesystems", "r");
+ if (!f)
+ return 1;
+
+ while (fgets(buf, sizeof(buf), f))
+ if (strstr(buf, "fuseblk\n")) {
+ fclose(f);
+ return 1;
+ }
+
+ fclose(f);
+ return 0;
+}
diff --git a/contrib/libgen/basename_r.c b/contrib/libgen/basename_r.c
new file mode 100644
index 000000000..2c3a87afe
--- /dev/null
+++ b/contrib/libgen/basename_r.c
@@ -0,0 +1,40 @@
+/*
+ * borrowed from glibc-2.12.1/string/basename.c
+ * Modified to return "." for NULL or "", as required for SUSv2.
+ */
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_BASENAME
+
+/* Return the name-within-directory of a file name.
+ Copyright (C) 1996,97,98,2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+char *
+basename_r (filename)
+ const char *filename;
+{
+ char *p;
+
+ if ((filename == NULL) || (*filename == '\0'))
+ return ".";
+
+ p = strrchr (filename, '/');
+ return p ? p + 1 : (char *) filename;
+}
+#endif /* THREAD_UNSAFE_BASENAME */
diff --git a/contrib/libgen/dirname_r.c b/contrib/libgen/dirname_r.c
new file mode 100644
index 000000000..131cbcf2a
--- /dev/null
+++ b/contrib/libgen/dirname_r.c
@@ -0,0 +1,243 @@
+/*
+ * Borrowed from glibc-2.12.1/string/memrchr.c
+ * Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ * Removed code for long bigger than 32 bytes, renamed __ptr_t as void *
+ * changed reg_char type to char.
+ */
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_DIRNAME
+
+/* memrchr -- find the last occurrence of a byte in a memory block
+ Copyright (C) 1991, 93, 96, 97, 99, 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ with help from Dan Sahlin (dan@sics.se) and
+ commentary by Jim Blandy (jimb@ai.mit.edu);
+ adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
+ and implemented by Roland McGrath (roland@ai.mit.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+void *
+__memrchr (s, c_in, n)
+ const void * s;
+ int c_in;
+ size_t n;
+{
+ const unsigned char *char_ptr;
+ const unsigned long int *longword_ptr;
+ unsigned long int longword, magic_bits, charmask;
+ unsigned char c;
+
+ c = (unsigned char) c_in;
+
+ /* Handle the last few characters by reading one character at a time.
+ Do this until CHAR_PTR is aligned on a longword boundary. */
+ for (char_ptr = (const unsigned char *) s + n;
+ n > 0 && ((unsigned long int) char_ptr
+ & (sizeof (longword) - 1)) != 0;
+ --n)
+ if (*--char_ptr == c)
+ return (void *) char_ptr;
+
+ /* All these elucidatory comments refer to 4-byte longwords,
+ but the theory applies equally well to 8-byte longwords. */
+
+ longword_ptr = (const unsigned long int *) char_ptr;
+
+ /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
+ the "holes." Note that there is a hole just to the left of
+ each byte, with an extra at the end:
+
+ bits: 01111110 11111110 11111110 11111111
+ bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
+
+ The 1-bits make sure that carries propagate to the next 0-bit.
+ The 0-bits provide holes for carries to fall into. */
+
+ if (sizeof (longword) != 4 && sizeof (longword) != 8)
+ abort ();
+
+ magic_bits = 0x7efefeff;
+
+ /* Set up a longword, each of whose bytes is C. */
+ charmask = c | (c << 8);
+ charmask |= charmask << 16;
+
+ /* Instead of the traditional loop which tests each character,
+ we will test a longword at a time. The tricky part is testing
+ if *any of the four* bytes in the longword in question are zero. */
+ while (n >= sizeof (longword))
+ {
+ /* We tentatively exit the loop if adding MAGIC_BITS to
+ LONGWORD fails to change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-30 is set, there will be a carry
+ into bit 31, so all of the hole bits will be changed.
+
+ The one misfire occurs when bits 24-30 are clear and bit
+ 31 is set; in this case, the hole at bit 31 is not
+ changed. If we had access to the processor carry flag,
+ we could close this loophole by putting the fourth hole
+ at bit 32!
+
+ So it ignores everything except 128's, when they're aligned
+ properly.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ longword = *--longword_ptr ^ charmask;
+
+ /* Add MAGIC_BITS to LONGWORD. */
+ if ((((longword + magic_bits)
+
+ /* Set those bits that were unchanged by the addition. */
+ ^ ~longword)
+
+ /* Look at only the hole bits. If any of the hole bits
+ are unchanged, most likely one of the bytes was a
+ zero. */
+ & ~magic_bits) != 0)
+ {
+ /* Which of the bytes was C? If none of them were, it was
+ a misfire; continue the search. */
+
+ const unsigned char *cp = (const unsigned char *) longword_ptr;
+
+ if (cp[3] == c)
+ return (void *) &cp[3];
+ if (cp[2] == c)
+ return (void *) &cp[2];
+ if (cp[1] == c)
+ return (void *) &cp[1];
+ if (cp[0] == c)
+ return (void *) cp;
+ }
+
+ n -= sizeof (longword);
+ }
+
+ char_ptr = (const unsigned char *) longword_ptr;
+
+ while (n-- > 0)
+ {
+ if (*--char_ptr == c)
+ return (void *) char_ptr;
+ }
+
+ return 0;
+}
+
+/*
+ * Borrowed from glibc-2.12.1/misc/dirname.c
+ */
+
+/* dirname - return directory part of PATH.
+ Copyright (C) 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+char *
+dirname_r (char *path)
+{
+ static const char dot[] = ".";
+ char *last_slash;
+
+ /* Find last '/'. */
+ last_slash = path != NULL ? strrchr (path, '/') : NULL;
+
+ if (last_slash != NULL && last_slash != path && last_slash[1] == '\0')
+ {
+ /* Determine whether all remaining characters are slashes. */
+ char *runp;
+
+ for (runp = last_slash; runp != path; --runp)
+ if (runp[-1] != '/')
+ break;
+
+ /* The '/' is the last character, we have to look further. */
+ if (runp != path)
+ last_slash = __memrchr (path, '/', runp - path);
+ }
+
+ if (last_slash != NULL)
+ {
+ /* Determine whether all remaining characters are slashes. */
+ char *runp;
+
+ for (runp = last_slash; runp != path; --runp)
+ if (runp[-1] != '/')
+ break;
+
+ /* Terminate the path. */
+ if (runp == path)
+ {
+ /* The last slash is the first character in the string. We have to
+ return "/". As a special case we have to return "//" if there
+ are exactly two slashes at the beginning of the string. See
+ XBD 4.10 Path Name Resolution for more information. */
+ if (last_slash == path + 1)
+ ++last_slash;
+ else
+ last_slash = path + 1;
+ }
+ else
+ last_slash = runp;
+
+ last_slash[0] = '\0';
+ }
+ else
+ /* This assignment is ill-designed but the XPG specs require to
+ return a string containing "." in any case no directory part is
+ found and so a static and constant string is required. */
+ path = (char *) dot;
+
+ return path;
+}
+#endif /* THREAD_UNSAFE_DIRNAME */
diff --git a/contrib/md5/md5.c b/contrib/md5/md5.c
deleted file mode 100644
index 5f0d0d157..000000000
--- a/contrib/md5/md5.c
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * RFC 1321 compliant MD5 implementation
- *
- * Copyright (C) 2001-2003 Christophe Devine
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, visit the http://fsf.org website.
- */
-
-
-#include <inttypes.h>
-#include <string.h>
-
-#include "md5.h"
-
-void md5_begin(md_context *ctx)
-{
- ctx->A = 0x67452301;
- ctx->B = 0xEFCDAB89;
- ctx->C = 0x98BADCFE;
- ctx->D = 0x10325476;
-
- ctx->totalN = ctx->totalN2 = 0;
-}
-
-static void md5_process(md_context *ctx, const uint8_t data[CSUM_CHUNK])
-{
- uint32_t X[16], A, B, C, D;
-
- A = ctx->A;
- B = ctx->B;
- C = ctx->C;
- D = ctx->D;
-
- X[0] = IVAL(data, 0);
- X[1] = IVAL(data, 4);
- X[2] = IVAL(data, 8);
- X[3] = IVAL(data, 12);
- X[4] = IVAL(data, 16);
- X[5] = IVAL(data, 20);
- X[6] = IVAL(data, 24);
- X[7] = IVAL(data, 28);
- X[8] = IVAL(data, 32);
- X[9] = IVAL(data, 36);
- X[10] = IVAL(data, 40);
- X[11] = IVAL(data, 44);
- X[12] = IVAL(data, 48);
- X[13] = IVAL(data, 52);
- X[14] = IVAL(data, 56);
- X[15] = IVAL(data, 60);
-
-#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
-
-#define P(a,b,c,d,k,s,t) a += F(b,c,d) + X[k] + t, a = S(a,s) + b
-
-#define F(x,y,z) (z ^ (x & (y ^ z)))
-
- P(A, B, C, D, 0, 7, 0xD76AA478);
- P(D, A, B, C, 1, 12, 0xE8C7B756);
- P(C, D, A, B, 2, 17, 0x242070DB);
- P(B, C, D, A, 3, 22, 0xC1BDCEEE);
- P(A, B, C, D, 4, 7, 0xF57C0FAF);
- P(D, A, B, C, 5, 12, 0x4787C62A);
- P(C, D, A, B, 6, 17, 0xA8304613);
- P(B, C, D, A, 7, 22, 0xFD469501);
- P(A, B, C, D, 8, 7, 0x698098D8);
- P(D, A, B, C, 9, 12, 0x8B44F7AF);
- P(C, D, A, B, 10, 17, 0xFFFF5BB1);
- P(B, C, D, A, 11, 22, 0x895CD7BE);
- P(A, B, C, D, 12, 7, 0x6B901122);
- P(D, A, B, C, 13, 12, 0xFD987193);
- P(C, D, A, B, 14, 17, 0xA679438E);
- P(B, C, D, A, 15, 22, 0x49B40821);
-
-#undef F
-#define F(x,y,z) (y ^ (z & (x ^ y)))
-
- P(A, B, C, D, 1, 5, 0xF61E2562);
- P(D, A, B, C, 6, 9, 0xC040B340);
- P(C, D, A, B, 11, 14, 0x265E5A51);
- P(B, C, D, A, 0, 20, 0xE9B6C7AA);
- P(A, B, C, D, 5, 5, 0xD62F105D);
- P(D, A, B, C, 10, 9, 0x02441453);
- P(C, D, A, B, 15, 14, 0xD8A1E681);
- P(B, C, D, A, 4, 20, 0xE7D3FBC8);
- P(A, B, C, D, 9, 5, 0x21E1CDE6);
- P(D, A, B, C, 14, 9, 0xC33707D6);
- P(C, D, A, B, 3, 14, 0xF4D50D87);
- P(B, C, D, A, 8, 20, 0x455A14ED);
- P(A, B, C, D, 13, 5, 0xA9E3E905);
- P(D, A, B, C, 2, 9, 0xFCEFA3F8);
- P(C, D, A, B, 7, 14, 0x676F02D9);
- P(B, C, D, A, 12, 20, 0x8D2A4C8A);
-
-#undef F
-#define F(x,y,z) (x ^ y ^ z)
-
- P(A, B, C, D, 5, 4, 0xFFFA3942);
- P(D, A, B, C, 8, 11, 0x8771F681);
- P(C, D, A, B, 11, 16, 0x6D9D6122);
- P(B, C, D, A, 14, 23, 0xFDE5380C);
- P(A, B, C, D, 1, 4, 0xA4BEEA44);
- P(D, A, B, C, 4, 11, 0x4BDECFA9);
- P(C, D, A, B, 7, 16, 0xF6BB4B60);
- P(B, C, D, A, 10, 23, 0xBEBFBC70);
- P(A, B, C, D, 13, 4, 0x289B7EC6);
- P(D, A, B, C, 0, 11, 0xEAA127FA);
- P(C, D, A, B, 3, 16, 0xD4EF3085);
- P(B, C, D, A, 6, 23, 0x04881D05);
- P(A, B, C, D, 9, 4, 0xD9D4D039);
- P(D, A, B, C, 12, 11, 0xE6DB99E5);
- P(C, D, A, B, 15, 16, 0x1FA27CF8);
- P(B, C, D, A, 2, 23, 0xC4AC5665);
-
-#undef F
-#define F(x,y,z) (y ^ (x | ~z))
-
- P(A, B, C, D, 0, 6, 0xF4292244);
- P(D, A, B, C, 7, 10, 0x432AFF97);
- P(C, D, A, B, 14, 15, 0xAB9423A7);
- P(B, C, D, A, 5, 21, 0xFC93A039);
- P(A, B, C, D, 12, 6, 0x655B59C3);
- P(D, A, B, C, 3, 10, 0x8F0CCC92);
- P(C, D, A, B, 10, 15, 0xFFEFF47D);
- P(B, C, D, A, 1, 21, 0x85845DD1);
- P(A, B, C, D, 8, 6, 0x6FA87E4F);
- P(D, A, B, C, 15, 10, 0xFE2CE6E0);
- P(C, D, A, B, 6, 15, 0xA3014314);
- P(B, C, D, A, 13, 21, 0x4E0811A1);
- P(A, B, C, D, 4, 6, 0xF7537E82);
- P(D, A, B, C, 11, 10, 0xBD3AF235);
- P(C, D, A, B, 2, 15, 0x2AD7D2BB);
- P(B, C, D, A, 9, 21, 0xEB86D391);
-
-#undef F
-
- ctx->A += A;
- ctx->B += B;
- ctx->C += C;
- ctx->D += D;
-}
-
-void md5_update(md_context *ctx, const uint8_t *input, uint32_t length)
-{
- uint32_t left, fill;
-
- if (!length)
- return;
-
- left = ctx->totalN & 0x3F;
- fill = CSUM_CHUNK - left;
-
- ctx->totalN += length;
- ctx->totalN &= 0xFFFFFFFF;
-
- if (ctx->totalN < length)
- ctx->totalN2++;
-
- if (left && length >= fill) {
- memcpy(ctx->buffer + left, input, fill);
- md5_process(ctx, ctx->buffer);
- length -= fill;
- input += fill;
- left = 0;
- }
-
- while (length >= CSUM_CHUNK) {
- md5_process(ctx, input);
- length -= CSUM_CHUNK;
- input += CSUM_CHUNK;
- }
-
- if (length)
- memcpy(ctx->buffer + left, input, length);
-}
-
-static uint8_t md5_padding[CSUM_CHUNK] = { 0x80 };
-
-void md5_result(md_context *ctx, uint8_t digest[MD5_DIGEST_LEN])
-{
- uint32_t last, padn;
- uint32_t high, low;
- uint8_t msglen[8];
-
- high = (ctx->totalN >> 29)
- | (ctx->totalN2 << 3);
- low = (ctx->totalN << 3);
-
- SIVAL(msglen, 0, low);
- SIVAL(msglen, 4, high);
-
- last = ctx->totalN & 0x3F;
- padn = last < 56 ? 56 - last : 120 - last;
-
- md5_update(ctx, md5_padding, padn);
- md5_update(ctx, msglen, 8);
-
- SIVAL(digest, 0, ctx->A);
- SIVAL(digest, 4, ctx->B);
- SIVAL(digest, 8, ctx->C);
- SIVAL(digest, 12, ctx->D);
-}
-
-void get_md5(uint8_t *out, const uint8_t *input, int n)
-{
- md_context ctx;
- md5_begin(&ctx);
- md5_update(&ctx, input, n);
- md5_result(&ctx, out);
-}
-
-#ifdef TEST_MD5
-
-#include <stdlib.h>
-#include <stdio.h>
-
-/*
- * those are the standard RFC 1321 test vectors
- */
-
-static struct {
- char *str, *md5;
-} tests[] = {
- { "",
- "d41d8cd98f00b204e9800998ecf8427e" },
- { "a",
- "0cc175b9c0f1b6a831c399e269772661" },
- { "abc",
- "900150983cd24fb0d6963f7d28e17f72" },
- { "message digest",
- "f96b697d7cb7938d525a2f31aaf161d0" },
- { "abcdefghijklmnopqrstuvwxyz",
- "c3fcd3d76192e4007dfb496cca67e13b" },
- { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
- "d174ab98d277d9f5a5611c2c9f419d9f" },
- { "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
- "57edf4a22be3c955ac49da2e2107b67a" },
- { NULL, NULL }
-};
-
-int main(int argc, char *argv[])
-{
- FILE *f;
- int i, j;
- char output[33];
- md_context ctx;
- uint8_t buf[1000];
- uint8_t md5sum[MD5_DIGEST_LEN];
-
- if (argc < 2) {
- printf("\nMD5 Validation Tests:\n\n");
-
- for (i = 0; tests[i].str; i++) {
- char *str = tests[i].str;
- char *chk = tests[i].md5;
-
- printf(" Test %d ", i + 1);
-
- get_md5(md5sum, str, strlen(str));
-
- for (j = 0; j < MD5_DIGEST_LEN; j++)
- sprintf(output + j * 2, "%02x", md5sum[j]);
-
- if (memcmp(output, chk, 32)) {
- printf("failed!\n");
- return 1;
- }
-
- printf("passed.\n");
- }
-
- printf("\n");
- return 0;
- }
-
- while (--argc) {
- if (!(f = fopen(*++argv, "rb"))) {
- perror("fopen");
- return 1;
- }
-
- md5_begin(&ctx);
-
- while ((i = fread(buf, 1, sizeof buf, f)) > 0)
- md5_update(&ctx, buf, i);
-
- fclose(f);
-
- md5_result(&ctx, md5sum);
-
- for (j = 0; j < MD5_DIGEST_LEN; j++)
- printf("%02x", md5sum[j]);
-
- printf(" %s\n", *argv);
- }
-
- return 0;
-}
-
-#endif
diff --git a/contrib/md5/md5.h b/contrib/md5/md5.h
deleted file mode 100644
index ba8f08dbc..000000000
--- a/contrib/md5/md5.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/* rsync-3.0.6/byteorder.h */
-
-/*
- * Simple byteorder handling.
- *
- * Copyright (C) 1992-1995 Andrew Tridgell
- * Copyright (C) 2007-2008 Wayne Davison
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, visit the http://fsf.org website.
- */
-
-#undef CAREFUL_ALIGNMENT
-
-/* We know that the x86 can handle misalignment and has the same
- * byte order (LSB-first) as the 32-bit numbers we transmit. */
-
-#ifdef __i386__
-#define CAREFUL_ALIGNMENT 0
-#endif
-
-#ifndef CAREFUL_ALIGNMENT
-#define CAREFUL_ALIGNMENT 1
-#endif
-
-#define CVAL(buf,pos) (((unsigned char *)(buf))[pos])
-#define UVAL(buf,pos) ((uint32_t)CVAL(buf,pos))
-#define SCVAL(buf,pos,val) (CVAL(buf,pos) = (val))
-
-#if CAREFUL_ALIGNMENT
-#define PVAL(buf,pos) (UVAL(buf,pos)|UVAL(buf,(pos)+1)<<8)
-#define IVAL(buf,pos) (PVAL(buf,pos)|PVAL(buf,(pos)+2)<<16)
-#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
-#define SIVALX(buf,pos,val) (SSVALX(buf,pos,val&0xFFFF),SSVALX(buf,pos+2,val>>16))
-#define SIVAL(buf,pos,val) SIVALX((buf),(pos),((uint32_t)(val)))
-#else
-
-/* this handles things for architectures like the 386 that can handle
- alignment errors */
-
-/*
- WARNING: This section is dependent on the length of int32
- being correct. set CAREFUL_ALIGNMENT if it is not.
-*/
-
-#define IVAL(buf,pos) (*(uint32_t *)((char *)(buf) + (pos)))
-#define SIVAL(buf,pos,val) IVAL(buf,pos)=((uint32_t)(val))
-#endif
-
-/* The include file for both the MD4 and MD5 routines. */
-
-#define MD5_DIGEST_LEN 16
-#define MAX_DIGEST_LEN MD5_DIGEST_LEN
-
-#define CSUM_CHUNK 64
-
-typedef struct {
- uint32_t A, B, C, D;
- uint32_t totalN; /* bit count, lower 32 bits */
- uint32_t totalN2; /* bit count, upper 32 bits */
- uint8_t buffer[CSUM_CHUNK];
-} md_context;
-
-void md5_begin(md_context *ctx);
-void md5_update(md_context *ctx, const uint8_t *input, uint32_t length);
-void md5_result(md_context *ctx, uint8_t digest[MD5_DIGEST_LEN]);
-
-void get_md5(uint8_t digest[MD5_DIGEST_LEN], const uint8_t *input, int n);
diff --git a/contrib/qemu/block.c b/contrib/qemu/block.c
new file mode 100644
index 000000000..b56024113
--- /dev/null
+++ b/contrib/qemu/block.c
@@ -0,0 +1,4604 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "monitor/monitor.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+#include "qemu/module.h"
+#include "qapi/qmp/qjson.h"
+#include "sysemu/sysemu.h"
+#include "qemu/notify.h"
+#include "block/coroutine.h"
+#include "qmp-commands.h"
+#include "qemu/timer.h"
+
+#ifdef CONFIG_BSD
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/queue.h>
+#ifndef __DragonFly__
+#include <sys/disk.h>
+#endif
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
+typedef enum {
+ BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
+} BdrvRequestFlags;
+
+static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ bool is_write);
+static void coroutine_fn bdrv_co_do_rw(void *opaque);
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, int64_t *wait);
+
+static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
+ QTAILQ_HEAD_INITIALIZER(bdrv_states);
+
+static QLIST_HEAD(, BlockDriver) bdrv_drivers =
+ QLIST_HEAD_INITIALIZER(bdrv_drivers);
+
+/* If non-zero, use only whitelisted block drivers */
+static int use_bdrv_whitelist;
+
+#ifdef _WIN32
+static int is_windows_drive_prefix(const char *filename)
+{
+ return (((filename[0] >= 'a' && filename[0] <= 'z') ||
+ (filename[0] >= 'A' && filename[0] <= 'Z')) &&
+ filename[1] == ':');
+}
+
+int is_windows_drive(const char *filename)
+{
+ if (is_windows_drive_prefix(filename) &&
+ filename[2] == '\0')
+ return 1;
+ if (strstart(filename, "\\\\.\\", NULL) ||
+ strstart(filename, "//./", NULL))
+ return 1;
+ return 0;
+}
+#endif
+
+/* throttling disk I/O limits */
+void bdrv_io_limits_disable(BlockDriverState *bs)
+{
+ bs->io_limits_enabled = false;
+
+ while (qemu_co_queue_next(&bs->throttled_reqs));
+
+ if (bs->block_timer) {
+ qemu_del_timer(bs->block_timer);
+ qemu_free_timer(bs->block_timer);
+ bs->block_timer = NULL;
+ }
+
+ bs->slice_start = 0;
+ bs->slice_end = 0;
+}
+
+static void bdrv_block_timer(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+
+ qemu_co_queue_next(&bs->throttled_reqs);
+}
+
+void bdrv_io_limits_enable(BlockDriverState *bs)
+{
+ qemu_co_queue_init(&bs->throttled_reqs);
+ bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
+ bs->io_limits_enabled = true;
+}
+
+bool bdrv_io_limits_enabled(BlockDriverState *bs)
+{
+ BlockIOLimit *io_limits = &bs->io_limits;
+ return io_limits->bps[BLOCK_IO_LIMIT_READ]
+ || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
+ || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
+ || io_limits->iops[BLOCK_IO_LIMIT_READ]
+ || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
+ || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
+}
+
+static void bdrv_io_limits_intercept(BlockDriverState *bs,
+ bool is_write, int nb_sectors)
+{
+ int64_t wait_time = -1;
+
+ if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+ qemu_co_queue_wait(&bs->throttled_reqs);
+ }
+
+ /* In fact, we hope to keep each request's timing, in FIFO mode. The next
+ * throttled requests will not be dequeued until the current request is
+ * allowed to be serviced. So if the current request still exceeds the
+ * limits, it will be inserted to the head. All requests followed it will
+ * be still in throttled_reqs queue.
+ */
+
+ while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
+ qemu_mod_timer(bs->block_timer,
+ wait_time + qemu_get_clock_ns(vm_clock));
+ qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
+ }
+
+ qemu_co_queue_next(&bs->throttled_reqs);
+}
+
+/* check if the path starts with "<protocol>:" */
+static int path_has_protocol(const char *path)
+{
+ const char *p;
+
+#ifdef _WIN32
+ if (is_windows_drive(path) ||
+ is_windows_drive_prefix(path)) {
+ return 0;
+ }
+ p = path + strcspn(path, ":/\\");
+#else
+ p = path + strcspn(path, ":/");
+#endif
+
+ return *p == ':';
+}
+
+int path_is_absolute(const char *path)
+{
+#ifdef _WIN32
+ /* specific case for names like: "\\.\d:" */
+ if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
+ return 1;
+ }
+ return (*path == '/' || *path == '\\');
+#else
+ return (*path == '/');
+#endif
+}
+
+/* if filename is absolute, just copy it to dest. Otherwise, build a
+ path to it by considering it is relative to base_path. URL are
+ supported. */
+void path_combine(char *dest, int dest_size,
+ const char *base_path,
+ const char *filename)
+{
+ const char *p, *p1;
+ int len;
+
+ if (dest_size <= 0)
+ return;
+ if (path_is_absolute(filename)) {
+ pstrcpy(dest, dest_size, filename);
+ } else {
+ p = strchr(base_path, ':');
+ if (p)
+ p++;
+ else
+ p = base_path;
+ p1 = strrchr(base_path, '/');
+#ifdef _WIN32
+ {
+ const char *p2;
+ p2 = strrchr(base_path, '\\');
+ if (!p1 || p2 > p1)
+ p1 = p2;
+ }
+#endif
+ if (p1)
+ p1++;
+ else
+ p1 = base_path;
+ if (p1 > p)
+ p = p1;
+ len = p - base_path;
+ if (len > dest_size - 1)
+ len = dest_size - 1;
+ memcpy(dest, base_path, len);
+ dest[len] = '\0';
+ pstrcat(dest, dest_size, filename);
+ }
+}
+
+void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
+{
+ if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
+ pstrcpy(dest, sz, bs->backing_file);
+ } else {
+ path_combine(dest, sz, bs->filename, bs->backing_file);
+ }
+}
+
+void bdrv_register(BlockDriver *bdrv)
+{
+ /* Block drivers without coroutine functions need emulation */
+ if (!bdrv->bdrv_co_readv) {
+ bdrv->bdrv_co_readv = bdrv_co_readv_em;
+ bdrv->bdrv_co_writev = bdrv_co_writev_em;
+
+ /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
+ * the block driver lacks aio we need to emulate that too.
+ */
+ if (!bdrv->bdrv_aio_readv) {
+ /* add AIO emulation layer */
+ bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+ bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
+ }
+ }
+
+ QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
+}
+
+/* create a new block device (by default it is empty) */
+BlockDriverState *bdrv_new(const char *device_name)
+{
+ BlockDriverState *bs;
+
+ bs = g_malloc0(sizeof(BlockDriverState));
+ pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
+ if (device_name[0] != '\0') {
+ QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
+ }
+ bdrv_iostatus_disable(bs);
+ notifier_list_init(&bs->close_notifiers);
+ notifier_with_return_list_init(&bs->before_write_notifiers);
+
+ return bs;
+}
+
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
+{
+ notifier_list_add(&bs->close_notifiers, notify);
+}
+
+BlockDriver *bdrv_find_format(const char *format_name)
+{
+ BlockDriver *drv1;
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (!strcmp(drv1->format_name, format_name)) {
+ return drv1;
+ }
+ }
+ return NULL;
+}
+
+static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
+{
+ static const char *whitelist_rw[] = {
+ CONFIG_BDRV_RW_WHITELIST
+ };
+ static const char *whitelist_ro[] = {
+ CONFIG_BDRV_RO_WHITELIST
+ };
+ const char **p;
+
+ if (!whitelist_rw[0] && !whitelist_ro[0]) {
+ return 1; /* no whitelist, anything goes */
+ }
+
+ for (p = whitelist_rw; *p; p++) {
+ if (!strcmp(drv->format_name, *p)) {
+ return 1;
+ }
+ }
+ if (read_only) {
+ for (p = whitelist_ro; *p; p++) {
+ if (!strcmp(drv->format_name, *p)) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
+ bool read_only)
+{
+ BlockDriver *drv = bdrv_find_format(format_name);
+ return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
+}
+
+typedef struct CreateCo {
+ BlockDriver *drv;
+ char *filename;
+ QEMUOptionParameter *options;
+ int ret;
+} CreateCo;
+
+static void coroutine_fn bdrv_create_co_entry(void *opaque)
+{
+ CreateCo *cco = opaque;
+ assert(cco->drv);
+
+ cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
+}
+
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QEMUOptionParameter *options)
+{
+ int ret;
+
+ Coroutine *co;
+ CreateCo cco = {
+ .drv = drv,
+ .filename = g_strdup(filename),
+ .options = options,
+ .ret = NOT_DONE,
+ };
+
+ if (!drv->bdrv_create) {
+ ret = -ENOTSUP;
+ goto out;
+ }
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_create_co_entry(&cco);
+ } else {
+ co = qemu_coroutine_create(bdrv_create_co_entry);
+ qemu_coroutine_enter(co, &cco);
+ while (cco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ ret = cco.ret;
+
+out:
+ g_free(cco.filename);
+ return ret;
+}
+
+int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
+{
+ BlockDriver *drv;
+
+ drv = bdrv_find_protocol(filename, true);
+ if (drv == NULL) {
+ return -ENOENT;
+ }
+
+ return bdrv_create(drv, filename, options);
+}
+
+/*
+ * Create a uniquely-named empty temporary file.
+ * Return 0 upon success, otherwise a negative errno value.
+ */
+int get_tmp_filename(char *filename, int size)
+{
+#ifdef _WIN32
+ char temp_dir[MAX_PATH];
+ /* GetTempFileName requires that its output buffer (4th param)
+ have length MAX_PATH or greater. */
+ assert(size >= MAX_PATH);
+ return (GetTempPath(MAX_PATH, temp_dir)
+ && GetTempFileName(temp_dir, "qem", 0, filename)
+ ? 0 : -GetLastError());
+#else
+ int fd;
+ const char *tmpdir;
+ tmpdir = getenv("TMPDIR");
+ if (!tmpdir)
+ tmpdir = "/tmp";
+ if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
+ return -EOVERFLOW;
+ }
+ fd = mkstemp(filename);
+ if (fd < 0) {
+ return -errno;
+ }
+ if (close(fd) != 0) {
+ unlink(filename);
+ return -errno;
+ }
+ return 0;
+#endif
+}
+
+/*
+ * Detect host devices. By convention, /dev/cdrom[N] is always
+ * recognized as a host CDROM.
+ */
+static BlockDriver *find_hdev_driver(const char *filename)
+{
+ int score_max = 0, score;
+ BlockDriver *drv = NULL, *d;
+
+ QLIST_FOREACH(d, &bdrv_drivers, list) {
+ if (d->bdrv_probe_device) {
+ score = d->bdrv_probe_device(filename);
+ if (score > score_max) {
+ score_max = score;
+ drv = d;
+ }
+ }
+ }
+
+ return drv;
+}
+
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix)
+{
+ BlockDriver *drv1;
+ char protocol[128];
+ int len;
+ const char *p;
+
+ /* TODO Drivers without bdrv_file_open must be specified explicitly */
+
+ /*
+ * XXX(hch): we really should not let host device detection
+ * override an explicit protocol specification, but moving this
+ * later breaks access to device names with colons in them.
+ * Thanks to the brain-dead persistent naming schemes on udev-
+ * based Linux systems those actually are quite common.
+ */
+ drv1 = find_hdev_driver(filename);
+ if (drv1) {
+ return drv1;
+ }
+
+ if (!path_has_protocol(filename) || !allow_protocol_prefix) {
+ return bdrv_find_format("file");
+ }
+
+ p = strchr(filename, ':');
+ assert(p != NULL);
+ len = p - filename;
+ if (len > sizeof(protocol) - 1)
+ len = sizeof(protocol) - 1;
+ memcpy(protocol, filename, len);
+ protocol[len] = '\0';
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (drv1->protocol_name &&
+ !strcmp(drv1->protocol_name, protocol)) {
+ return drv1;
+ }
+ }
+ return NULL;
+}
+
+static int find_image_format(BlockDriverState *bs, const char *filename,
+ BlockDriver **pdrv)
+{
+ int score, score_max;
+ BlockDriver *drv1, *drv;
+ uint8_t buf[2048];
+ int ret = 0;
+
+ /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
+ if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
+ drv = bdrv_find_format("raw");
+ if (!drv) {
+ ret = -ENOENT;
+ }
+ *pdrv = drv;
+ return ret;
+ }
+
+ ret = bdrv_pread(bs, 0, buf, sizeof(buf));
+ if (ret < 0) {
+ *pdrv = NULL;
+ return ret;
+ }
+
+ score_max = 0;
+ drv = NULL;
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (drv1->bdrv_probe) {
+ score = drv1->bdrv_probe(buf, ret, filename);
+ if (score > score_max) {
+ score_max = score;
+ drv = drv1;
+ }
+ }
+ }
+ if (!drv) {
+ ret = -ENOENT;
+ }
+ *pdrv = drv;
+ return ret;
+}
+
+/**
+ * Set the current 'total_sectors' value
+ */
+static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
+{
+ BlockDriver *drv = bs->drv;
+
+ /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
+ if (bs->sg)
+ return 0;
+
+ /* query actual device if possible, otherwise just trust the hint */
+ if (drv->bdrv_getlength) {
+ int64_t length = drv->bdrv_getlength(bs);
+ if (length < 0) {
+ return length;
+ }
+ hint = length >> BDRV_SECTOR_BITS;
+ }
+
+ bs->total_sectors = hint;
+ return 0;
+}
+
+/**
+ * Set open flags for a given discard mode
+ *
+ * Return 0 on success, -1 if the discard mode was invalid.
+ */
+int bdrv_parse_discard_flags(const char *mode, int *flags)
+{
+ *flags &= ~BDRV_O_UNMAP;
+
+ if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
+ /* do nothing */
+ } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
+ *flags |= BDRV_O_UNMAP;
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Set open flags for a given cache mode
+ *
+ * Return 0 on success, -1 if the cache mode was invalid.
+ */
+int bdrv_parse_cache_flags(const char *mode, int *flags)
+{
+ *flags &= ~BDRV_O_CACHE_MASK;
+
+ if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
+ *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
+ } else if (!strcmp(mode, "directsync")) {
+ *flags |= BDRV_O_NOCACHE;
+ } else if (!strcmp(mode, "writeback")) {
+ *flags |= BDRV_O_CACHE_WB;
+ } else if (!strcmp(mode, "unsafe")) {
+ *flags |= BDRV_O_CACHE_WB;
+ *flags |= BDRV_O_NO_FLUSH;
+ } else if (!strcmp(mode, "writethrough")) {
+ /* this is the default */
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * The copy-on-read flag is actually a reference count so multiple users may
+ * use the feature without worrying about clobbering its previous state.
+ * Copy-on-read stays enabled until all users have called to disable it.
+ */
+void bdrv_enable_copy_on_read(BlockDriverState *bs)
+{
+ bs->copy_on_read++;
+}
+
+void bdrv_disable_copy_on_read(BlockDriverState *bs)
+{
+ assert(bs->copy_on_read > 0);
+ bs->copy_on_read--;
+}
+
+static int bdrv_open_flags(BlockDriverState *bs, int flags)
+{
+ int open_flags = flags | BDRV_O_CACHE_WB;
+
+ /*
+ * Clear flags that are internal to the block layer before opening the
+ * image.
+ */
+ open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+ /*
+ * Snapshots should be writable.
+ */
+ if (bs->is_temporary) {
+ open_flags |= BDRV_O_RDWR;
+ }
+
+ return open_flags;
+}
+
+/*
+ * Common part for opening disk images and files
+ *
+ * Removes all processed options from *options.
+ */
+static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
+ QDict *options, int flags, BlockDriver *drv)
+{
+ int ret, open_flags;
+ const char *filename;
+
+ assert(drv != NULL);
+ assert(bs->file == NULL);
+ assert(options != NULL && bs->options != options);
+
+ if (file != NULL) {
+ filename = file->filename;
+ } else {
+ filename = qdict_get_try_str(options, "filename");
+ }
+
+ trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
+
+ /* bdrv_open() with directly using a protocol as drv. This layer is already
+ * opened, so assign it to bs (while file becomes a closed BlockDriverState)
+ * and return immediately. */
+ if (file != NULL && drv->bdrv_file_open) {
+ bdrv_swap(file, bs);
+ return 0;
+ }
+
+ bs->open_flags = flags;
+ bs->buffer_alignment = 512;
+ open_flags = bdrv_open_flags(bs, flags);
+ bs->read_only = !(open_flags & BDRV_O_RDWR);
+
+ if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
+ return -ENOTSUP;
+ }
+
+ assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
+ if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) {
+ bdrv_enable_copy_on_read(bs);
+ }
+
+ if (filename != NULL) {
+ pstrcpy(bs->filename, sizeof(bs->filename), filename);
+ } else {
+ bs->filename[0] = '\0';
+ }
+
+ bs->drv = drv;
+ bs->opaque = g_malloc0(drv->instance_size);
+
+ bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
+
+ /* Open the image, either directly or using a protocol */
+ if (drv->bdrv_file_open) {
+ assert(file == NULL);
+ assert(drv->bdrv_parse_filename || filename != NULL);
+ ret = drv->bdrv_file_open(bs, options, open_flags);
+ } else {
+ if (file == NULL) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a "
+ "block driver for the protocol level",
+ drv->format_name);
+ ret = -EINVAL;
+ goto free_and_fail;
+ }
+ assert(file != NULL);
+ bs->file = file;
+ ret = drv->bdrv_open(bs, options, open_flags);
+ }
+
+ if (ret < 0) {
+ goto free_and_fail;
+ }
+
+ ret = refresh_total_sectors(bs, bs->total_sectors);
+ if (ret < 0) {
+ goto free_and_fail;
+ }
+
+#ifndef _WIN32
+ if (bs->is_temporary) {
+ assert(filename != NULL);
+ unlink(filename);
+ }
+#endif
+ return 0;
+
+free_and_fail:
+ bs->file = NULL;
+ g_free(bs->opaque);
+ bs->opaque = NULL;
+ bs->drv = NULL;
+ return ret;
+}
+
+/*
+ * Opens a file using a protocol (file, host_device, nbd, ...)
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
+ */
+int bdrv_file_open(BlockDriverState **pbs, const char *filename,
+ QDict *options, int flags)
+{
+ BlockDriverState *bs;
+ BlockDriver *drv;
+ const char *drvname;
+ bool allow_protocol_prefix = false;
+ int ret;
+
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs = bdrv_new("");
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* Fetch the file name from the options QDict if necessary */
+ if (!filename) {
+ filename = qdict_get_try_str(options, "filename");
+ } else if (filename && !qdict_haskey(options, "filename")) {
+ qdict_put(options, "filename", qstring_from_str(filename));
+ allow_protocol_prefix = true;
+ } else {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
+ "'filename' options at the same time");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* Find the right block driver */
+ drvname = qdict_get_try_str(options, "driver");
+ if (drvname) {
+ drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
+ qdict_del(options, "driver");
+ } else if (filename) {
+ drv = bdrv_find_protocol(filename, allow_protocol_prefix);
+ if (!drv) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol");
+ }
+ } else {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR,
+ "Must specify either driver or file");
+ drv = NULL;
+ }
+
+ if (!drv) {
+ ret = -ENOENT;
+ goto fail;
+ }
+
+ /* Parse the filename and open it */
+ if (drv->bdrv_parse_filename && filename) {
+ Error *local_err = NULL;
+ drv->bdrv_parse_filename(filename, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+ qdict_del(options, "filename");
+ } else if (!drv->bdrv_parse_filename && !filename) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR,
+ "The '%s' block driver requires a file name",
+ drv->format_name);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = bdrv_open_common(bs, NULL, options, flags, drv);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Check if any unknown options were used */
+ if (qdict_size(options) != 0) {
+ const QDictEntry *entry = qdict_first(options);
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't "
+ "support the option '%s'",
+ drv->format_name, entry->key);
+ ret = -EINVAL;
+ goto fail;
+ }
+ QDECREF(options);
+
+ bs->growable = 1;
+ *pbs = bs;
+ return 0;
+
+fail:
+ QDECREF(options);
+ if (!bs->drv) {
+ QDECREF(bs->options);
+ }
+ bdrv_delete(bs);
+ return ret;
+}
+
+/*
+ * Opens the backing file for a BlockDriverState if not yet open
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict is transferred to this
+ * function (even on failure), so if the caller intends to reuse the dictionary,
+ * it needs to use QINCREF() before calling bdrv_file_open.
+ */
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
+{
+ char backing_filename[PATH_MAX];
+ int back_flags, ret;
+ BlockDriver *back_drv = NULL;
+
+ if (bs->backing_hd != NULL) {
+ QDECREF(options);
+ return 0;
+ }
+
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs->open_flags &= ~BDRV_O_NO_BACKING;
+ if (qdict_haskey(options, "file.filename")) {
+ backing_filename[0] = '\0';
+ } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
+ QDECREF(options);
+ return 0;
+ }
+
+ bs->backing_hd = bdrv_new("");
+ bdrv_get_full_backing_filename(bs, backing_filename,
+ sizeof(backing_filename));
+
+ if (bs->backing_format[0] != '\0') {
+ back_drv = bdrv_find_format(bs->backing_format);
+ }
+
+ /* backing files always opened read-only */
+ back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);
+
+ ret = bdrv_open(bs->backing_hd,
+ *backing_filename ? backing_filename : NULL, options,
+ back_flags, back_drv);
+ if (ret < 0) {
+ bdrv_delete(bs->backing_hd);
+ bs->backing_hd = NULL;
+ bs->open_flags |= BDRV_O_NO_BACKING;
+ return ret;
+ }
+ return 0;
+}
+
+static void extract_subqdict(QDict *src, QDict **dst, const char *start)
+{
+ const QDictEntry *entry, *next;
+ const char *p;
+
+ *dst = qdict_new();
+ entry = qdict_first(src);
+
+ while (entry != NULL) {
+ next = qdict_next(src, entry);
+ if (strstart(entry->key, start, &p)) {
+ qobject_incref(entry->value);
+ qdict_put_obj(*dst, p, entry->value);
+ qdict_del(src, entry->key);
+ }
+ entry = next;
+ }
+}
+
+/*
+ * Opens a disk image (raw, qcow2, vmdk, ...)
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use QINCREF() before calling bdrv_open.
+ */
+int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
+ int flags, BlockDriver *drv)
+{
+ int ret;
+ /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
+ char tmp_filename[PATH_MAX + 1];
+ BlockDriverState *file = NULL;
+ QDict *file_options = NULL;
+
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* For snapshot=on, create a temporary qcow2 overlay */
+ if (flags & BDRV_O_SNAPSHOT) {
+ BlockDriverState *bs1;
+ int64_t total_size;
+ BlockDriver *bdrv_qcow2;
+ QEMUOptionParameter *create_options;
+ char backing_filename[PATH_MAX];
+
+ if (qdict_size(options) != 0) {
+ error_report("Can't use snapshot=on with driver-specific options");
+ ret = -EINVAL;
+ goto fail;
+ }
+ assert(filename != NULL);
+
+ /* if snapshot, we create a temporary backing file and open it
+ instead of opening 'filename' directly */
+
+ /* if there is a backing file, use it */
+ bs1 = bdrv_new("");
+ ret = bdrv_open(bs1, filename, NULL, 0, drv);
+ if (ret < 0) {
+ bdrv_delete(bs1);
+ goto fail;
+ }
+ total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
+
+ bdrv_delete(bs1);
+
+ ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Real path is meaningless for protocols */
+ if (path_has_protocol(filename)) {
+ snprintf(backing_filename, sizeof(backing_filename),
+ "%s", filename);
+ } else if (!realpath(filename, backing_filename)) {
+ ret = -errno;
+ goto fail;
+ }
+
+ bdrv_qcow2 = bdrv_find_format("qcow2");
+ create_options = parse_option_parameters("", bdrv_qcow2->create_options,
+ NULL);
+
+ set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
+ set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
+ backing_filename);
+ if (drv) {
+ set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
+ drv->format_name);
+ }
+
+ ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options);
+ free_option_parameters(create_options);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ filename = tmp_filename;
+ drv = bdrv_qcow2;
+ bs->is_temporary = 1;
+ }
+
+ /* Open image file without format layer */
+ if (flags & BDRV_O_RDWR) {
+ flags |= BDRV_O_ALLOW_RDWR;
+ }
+
+ extract_subqdict(options, &file_options, "file.");
+
+ ret = bdrv_file_open(&file, filename, file_options,
+ bdrv_open_flags(bs, flags | BDRV_O_UNMAP));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Find the right image format driver */
+ if (!drv) {
+ ret = find_image_format(file, filename, &drv);
+ }
+
+ if (!drv) {
+ goto unlink_and_fail;
+ }
+
+ /* Open the image */
+ ret = bdrv_open_common(bs, file, options, flags, drv);
+ if (ret < 0) {
+ goto unlink_and_fail;
+ }
+
+ if (bs->file != file) {
+ bdrv_delete(file);
+ file = NULL;
+ }
+
+ /* If there is a backing file, use it */
+ if ((flags & BDRV_O_NO_BACKING) == 0) {
+ QDict *backing_options;
+
+ extract_subqdict(options, &backing_options, "backing.");
+ ret = bdrv_open_backing_file(bs, backing_options);
+ if (ret < 0) {
+ goto close_and_fail;
+ }
+ }
+
+ /* Check if any unknown options were used */
+ if (qdict_size(options) != 0) {
+ const QDictEntry *entry = qdict_first(options);
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by "
+ "device '%s' doesn't support the option '%s'",
+ drv->format_name, bs->device_name, entry->key);
+
+ ret = -EINVAL;
+ goto close_and_fail;
+ }
+ QDECREF(options);
+
+ if (!bdrv_key_required(bs)) {
+ bdrv_dev_change_media_cb(bs, true);
+ }
+
+ /* throttling disk I/O limits */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_enable(bs);
+ }
+
+ return 0;
+
+unlink_and_fail:
+ if (file != NULL) {
+ bdrv_delete(file);
+ }
+ if (bs->is_temporary) {
+ unlink(filename);
+ }
+fail:
+ QDECREF(bs->options);
+ QDECREF(options);
+ bs->options = NULL;
+ return ret;
+
+close_and_fail:
+ bdrv_close(bs);
+ QDECREF(options);
+ return ret;
+}
+
+typedef struct BlockReopenQueueEntry {
+ bool prepared;
+ BDRVReopenState state;
+ QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
+} BlockReopenQueueEntry;
+
+/*
+ * Adds a BlockDriverState to a simple queue for an atomic, transactional
+ * reopen of multiple devices.
+ *
+ * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
+ * already performed, or alternatively may be NULL a new BlockReopenQueue will
+ * be created and initialized. This newly created BlockReopenQueue should be
+ * passed back in for subsequent calls that are intended to be of the same
+ * atomic 'set'.
+ *
+ * bs is the BlockDriverState to add to the reopen queue.
+ *
+ * flags contains the open flags for the associated bs
+ *
+ * returns a pointer to bs_queue, which is either the newly allocated
+ * bs_queue, or the existing bs_queue being used.
+ *
+ */
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags)
+{
+ assert(bs != NULL);
+
+ BlockReopenQueueEntry *bs_entry;
+ if (bs_queue == NULL) {
+ bs_queue = g_new0(BlockReopenQueue, 1);
+ QSIMPLEQ_INIT(bs_queue);
+ }
+
+ if (bs->file) {
+ bdrv_reopen_queue(bs_queue, bs->file, flags);
+ }
+
+ bs_entry = g_new0(BlockReopenQueueEntry, 1);
+ QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+
+ bs_entry->state.bs = bs;
+ bs_entry->state.flags = flags;
+
+ return bs_queue;
+}
+
+/*
+ * Reopen multiple BlockDriverStates atomically & transactionally.
+ *
+ * The queue passed in (bs_queue) must have been built up previous
+ * via bdrv_reopen_queue().
+ *
+ * Reopens all BDS specified in the queue, with the appropriate
+ * flags. All devices are prepared for reopen, and failure of any
+ * device will cause all device changes to be abandonded, and intermediate
+ * data cleaned up.
+ *
+ * If all devices prepare successfully, then the changes are committed
+ * to all devices.
+ *
+ */
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
+{
+ int ret = -1;
+ BlockReopenQueueEntry *bs_entry, *next;
+ Error *local_err = NULL;
+
+ assert(bs_queue != NULL);
+
+ bdrv_drain_all();
+
+ QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
+ error_propagate(errp, local_err);
+ goto cleanup;
+ }
+ bs_entry->prepared = true;
+ }
+
+ /* If we reach this point, we have success and just need to apply the
+ * changes
+ */
+ QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ bdrv_reopen_commit(&bs_entry->state);
+ }
+
+ ret = 0;
+
+cleanup:
+ QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+ if (ret && bs_entry->prepared) {
+ bdrv_reopen_abort(&bs_entry->state);
+ }
+ g_free(bs_entry);
+ }
+ g_free(bs_queue);
+ return ret;
+}
+
+
+/* Reopen a single BlockDriverState with the specified flags. */
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
+{
+ int ret = -1;
+ Error *local_err = NULL;
+ BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
+
+ ret = bdrv_reopen_multiple(queue, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ }
+ return ret;
+}
+
+
+/*
+ * Prepares a BlockDriverState for reopen. All changes are staged in the
+ * 'opaque' field of the BDRVReopenState, which is used and allocated by
+ * the block driver layer .bdrv_reopen_prepare()
+ *
+ * bs is the BlockDriverState to reopen
+ * flags are the new open flags
+ * queue is the reopen queue
+ *
+ * Returns 0 on success, non-zero on error. On error errp will be set
+ * as well.
+ *
+ * On failure, bdrv_reopen_abort() will be called to clean up any data.
+ * It is the responsibility of the caller to then call the abort() or
+ * commit() for any other BDS that have been left in a prepare() state
+ *
+ */
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
+ Error **errp)
+{
+ int ret = -1;
+ Error *local_err = NULL;
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ assert(reopen_state->bs->drv != NULL);
+ drv = reopen_state->bs->drv;
+
+ /* if we are to stay read-only, do not allow permission change
+ * to r/w */
+ if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
+ reopen_state->flags & BDRV_O_RDWR) {
+ error_set(errp, QERR_DEVICE_IS_READ_ONLY,
+ reopen_state->bs->device_name);
+ goto error;
+ }
+
+
+ ret = bdrv_flush(reopen_state->bs);
+ if (ret) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
+ strerror(-ret));
+ goto error;
+ }
+
+ if (drv->bdrv_reopen_prepare) {
+ ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
+ if (ret) {
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ } else {
+ error_setg(errp, "failed while preparing to reopen image '%s'",
+ reopen_state->bs->filename);
+ }
+ goto error;
+ }
+ } else {
+ /* It is currently mandatory to have a bdrv_reopen_prepare()
+ * handler for each supported drv. */
+ error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+ drv->format_name, reopen_state->bs->device_name,
+ "reopening of file");
+ ret = -1;
+ goto error;
+ }
+
+ ret = 0;
+
+error:
+ return ret;
+}
+
+/*
+ * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
+ * makes them final by swapping the staging BlockDriverState contents into
+ * the active BlockDriverState contents.
+ */
+void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+{
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ drv = reopen_state->bs->drv;
+ assert(drv != NULL);
+
+ /* If there are any driver level actions to take */
+ if (drv->bdrv_reopen_commit) {
+ drv->bdrv_reopen_commit(reopen_state);
+ }
+
+ /* set BDS specific flags now */
+ reopen_state->bs->open_flags = reopen_state->flags;
+ reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
+ BDRV_O_CACHE_WB);
+ reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+}
+
+/*
+ * Abort the reopen, and delete and free the staged changes in
+ * reopen_state
+ */
+void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+{
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ drv = reopen_state->bs->drv;
+ assert(drv != NULL);
+
+ if (drv->bdrv_reopen_abort) {
+ drv->bdrv_reopen_abort(reopen_state);
+ }
+}
+
+
+void bdrv_close(BlockDriverState *bs)
+{
+ if (bs->job) {
+ block_job_cancel_sync(bs->job);
+ }
+ bdrv_drain_all(); /* complete I/O */
+ bdrv_flush(bs);
+ bdrv_drain_all(); /* in case flush left pending I/O */
+ notifier_list_notify(&bs->close_notifiers, bs);
+
+ if (bs->drv) {
+ if (bs->backing_hd) {
+ bdrv_delete(bs->backing_hd);
+ bs->backing_hd = NULL;
+ }
+ bs->drv->bdrv_close(bs);
+ g_free(bs->opaque);
+#ifdef _WIN32
+ if (bs->is_temporary) {
+ unlink(bs->filename);
+ }
+#endif
+ bs->opaque = NULL;
+ bs->drv = NULL;
+ bs->copy_on_read = 0;
+ bs->backing_file[0] = '\0';
+ bs->backing_format[0] = '\0';
+ bs->total_sectors = 0;
+ bs->encrypted = 0;
+ bs->valid_key = 0;
+ bs->sg = 0;
+ bs->growable = 0;
+ QDECREF(bs->options);
+ bs->options = NULL;
+
+ if (bs->file != NULL) {
+ bdrv_delete(bs->file);
+ bs->file = NULL;
+ }
+ }
+
+ bdrv_dev_change_media_cb(bs, false);
+
+ /*throttling disk I/O limits*/
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_disable(bs);
+ }
+}
+
+void bdrv_close_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bdrv_close(bs);
+ }
+}
+
+/*
+ * Wait for pending requests to complete across all BlockDriverStates
+ *
+ * This function does not flush data to disk, use bdrv_flush_all() for that
+ * after calling this function.
+ *
+ * Note that completion of an asynchronous I/O operation can trigger any
+ * number of other I/O operations on other devices---for example a coroutine
+ * can be arbitrarily complex and a constant flow of I/O can come until the
+ * coroutine is complete. Because of this, it is not possible to have a
+ * function to drain a single device's I/O queue.
+ */
+void bdrv_drain_all(void)
+{
+ BlockDriverState *bs;
+ bool busy;
+
+ do {
+ busy = qemu_aio_wait();
+
+ /* FIXME: We do not have timer support here, so this is effectively
+ * a busy wait.
+ */
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+ qemu_co_queue_restart_all(&bs->throttled_reqs);
+ busy = true;
+ }
+ }
+ } while (busy);
+
+ /* If requests are still pending there is a bug somewhere */
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ assert(QLIST_EMPTY(&bs->tracked_requests));
+ assert(qemu_co_queue_empty(&bs->throttled_reqs));
+ }
+}
+
+/* make a BlockDriverState anonymous by removing from bdrv_state list.
+ Also, NULL terminate the device_name to prevent double remove */
+void bdrv_make_anon(BlockDriverState *bs)
+{
+ if (bs->device_name[0] != '\0') {
+ QTAILQ_REMOVE(&bdrv_states, bs, list);
+ }
+ bs->device_name[0] = '\0';
+}
+
+static void bdrv_rebind(BlockDriverState *bs)
+{
+ if (bs->drv && bs->drv->bdrv_rebind) {
+ bs->drv->bdrv_rebind(bs);
+ }
+}
+
+static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
+ BlockDriverState *bs_src)
+{
+ /* move some fields that need to stay attached to the device */
+ bs_dest->open_flags = bs_src->open_flags;
+
+ /* dev info */
+ bs_dest->dev_ops = bs_src->dev_ops;
+ bs_dest->dev_opaque = bs_src->dev_opaque;
+ bs_dest->dev = bs_src->dev;
+ bs_dest->buffer_alignment = bs_src->buffer_alignment;
+ bs_dest->copy_on_read = bs_src->copy_on_read;
+
+ bs_dest->enable_write_cache = bs_src->enable_write_cache;
+
+ /* i/o timing parameters */
+ bs_dest->slice_start = bs_src->slice_start;
+ bs_dest->slice_end = bs_src->slice_end;
+ bs_dest->slice_submitted = bs_src->slice_submitted;
+ bs_dest->io_limits = bs_src->io_limits;
+ bs_dest->throttled_reqs = bs_src->throttled_reqs;
+ bs_dest->block_timer = bs_src->block_timer;
+ bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
+
+ /* r/w error */
+ bs_dest->on_read_error = bs_src->on_read_error;
+ bs_dest->on_write_error = bs_src->on_write_error;
+
+ /* i/o status */
+ bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
+ bs_dest->iostatus = bs_src->iostatus;
+
+ /* dirty bitmap */
+ bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
+
+ /* job */
+ bs_dest->in_use = bs_src->in_use;
+ bs_dest->job = bs_src->job;
+
+ /* keep the same entry in bdrv_states */
+ pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
+ bs_src->device_name);
+ bs_dest->list = bs_src->list;
+}
+
+/*
+ * Swap bs contents for two image chains while they are live,
+ * while keeping required fields on the BlockDriverState that is
+ * actually attached to a device.
+ *
+ * This will modify the BlockDriverState fields, and swap contents
+ * between bs_new and bs_old. Both bs_new and bs_old are modified.
+ *
+ * bs_new is required to be anonymous.
+ *
+ * This function does not create any image files.
+ */
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
+{
+ BlockDriverState tmp;
+
+ /* bs_new must be anonymous and shouldn't have anything fancy enabled */
+ assert(bs_new->device_name[0] == '\0');
+ assert(bs_new->dirty_bitmap == NULL);
+ assert(bs_new->job == NULL);
+ assert(bs_new->dev == NULL);
+ assert(bs_new->in_use == 0);
+ assert(bs_new->io_limits_enabled == false);
+ assert(bs_new->block_timer == NULL);
+
+ tmp = *bs_new;
+ *bs_new = *bs_old;
+ *bs_old = tmp;
+
+ /* there are some fields that should not be swapped, move them back */
+ bdrv_move_feature_fields(&tmp, bs_old);
+ bdrv_move_feature_fields(bs_old, bs_new);
+ bdrv_move_feature_fields(bs_new, &tmp);
+
+ /* bs_new shouldn't be in bdrv_states even after the swap! */
+ assert(bs_new->device_name[0] == '\0');
+
+ /* Check a few fields that should remain attached to the device */
+ assert(bs_new->dev == NULL);
+ assert(bs_new->job == NULL);
+ assert(bs_new->in_use == 0);
+ assert(bs_new->io_limits_enabled == false);
+ assert(bs_new->block_timer == NULL);
+
+ bdrv_rebind(bs_new);
+ bdrv_rebind(bs_old);
+}
+
+/*
+ * Add new bs contents at the top of an image chain while the chain is
+ * live, while keeping required fields on the top layer.
+ *
+ * This will modify the BlockDriverState fields, and swap contents
+ * between bs_new and bs_top. Both bs_new and bs_top are modified.
+ *
+ * bs_new is required to be anonymous.
+ *
+ * This function does not create any image files.
+ */
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+{
+ bdrv_swap(bs_new, bs_top);
+
+ /* The contents of 'tmp' will become bs_top, as we are
+ * swapping bs_new and bs_top contents. */
+ bs_top->backing_hd = bs_new;
+ bs_top->open_flags &= ~BDRV_O_NO_BACKING;
+ pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
+ bs_new->filename);
+ pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
+ bs_new->drv ? bs_new->drv->format_name : "");
+}
+
+void bdrv_delete(BlockDriverState *bs)
+{
+ assert(!bs->dev);
+ assert(!bs->job);
+ assert(!bs->in_use);
+
+ /* remove from list, if necessary */
+ bdrv_make_anon(bs);
+
+ bdrv_close(bs);
+
+ g_free(bs);
+}
+
+int bdrv_attach_dev(BlockDriverState *bs, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
+{
+ if (bs->dev) {
+ return -EBUSY;
+ }
+ bs->dev = dev;
+ bdrv_iostatus_reset(bs);
+ return 0;
+}
+
+/* TODO qdevified devices don't use this, remove when devices are qdevified */
+void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
+{
+ if (bdrv_attach_dev(bs, dev) < 0) {
+ abort();
+ }
+}
+
+void bdrv_detach_dev(BlockDriverState *bs, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
+{
+ assert(bs->dev == dev);
+ bs->dev = NULL;
+ bs->dev_ops = NULL;
+ bs->dev_opaque = NULL;
+ bs->buffer_alignment = 512;
+}
+
+/* TODO change to return DeviceState * when all users are qdevified */
+void *bdrv_get_attached_dev(BlockDriverState *bs)
+{
+ return bs->dev;
+}
+
+void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
+ void *opaque)
+{
+ bs->dev_ops = ops;
+ bs->dev_opaque = opaque;
+}
+
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+ enum MonitorEvent ev,
+ BlockErrorAction action, bool is_read)
+{
+ QObject *data;
+ const char *action_str;
+
+ switch (action) {
+ case BDRV_ACTION_REPORT:
+ action_str = "report";
+ break;
+ case BDRV_ACTION_IGNORE:
+ action_str = "ignore";
+ break;
+ case BDRV_ACTION_STOP:
+ action_str = "stop";
+ break;
+ default:
+ abort();
+ }
+
+ data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
+ bdrv->device_name,
+ action_str,
+ is_read ? "read" : "write");
+ monitor_protocol_event(ev, data);
+
+ qobject_decref(data);
+}
+
+static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
+{
+ QObject *data;
+
+ data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
+ bdrv_get_device_name(bs), ejected);
+ monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
+
+ qobject_decref(data);
+}
+
+static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
+{
+ if (bs->dev_ops && bs->dev_ops->change_media_cb) {
+ bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
+ bs->dev_ops->change_media_cb(bs->dev_opaque, load);
+ if (tray_was_closed) {
+ /* tray open */
+ bdrv_emit_qmp_eject_event(bs, true);
+ }
+ if (load) {
+ /* tray close */
+ bdrv_emit_qmp_eject_event(bs, false);
+ }
+ }
+}
+
+bool bdrv_dev_has_removable_media(BlockDriverState *bs)
+{
+ return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
+}
+
+void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
+{
+ if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
+ bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
+ }
+}
+
+bool bdrv_dev_is_tray_open(BlockDriverState *bs)
+{
+ if (bs->dev_ops && bs->dev_ops->is_tray_open) {
+ return bs->dev_ops->is_tray_open(bs->dev_opaque);
+ }
+ return false;
+}
+
+static void bdrv_dev_resize_cb(BlockDriverState *bs)
+{
+ if (bs->dev_ops && bs->dev_ops->resize_cb) {
+ bs->dev_ops->resize_cb(bs->dev_opaque);
+ }
+}
+
+bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
+{
+ if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
+ return bs->dev_ops->is_medium_locked(bs->dev_opaque);
+ }
+ return false;
+}
+
+/*
+ * Run consistency checks on an image
+ *
+ * Returns 0 if the check could be completed (it doesn't mean that the image is
+ * free of errors) or -errno when an internal error occurred. The results of the
+ * check are stored in res.
+ */
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
+{
+ if (bs->drv->bdrv_check == NULL) {
+ return -ENOTSUP;
+ }
+
+ memset(res, 0, sizeof(*res));
+ return bs->drv->bdrv_check(bs, res, fix);
+}
+
+#define COMMIT_BUF_SECTORS 2048
+
+/* commit COW file into the raw image */
+int bdrv_commit(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector, total_sectors;
+ int n, ro, open_flags;
+ int ret = 0;
+ uint8_t *buf;
+ char filename[PATH_MAX];
+
+ if (!drv)
+ return -ENOMEDIUM;
+
+ if (!bs->backing_hd) {
+ return -ENOTSUP;
+ }
+
+ if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
+ return -EBUSY;
+ }
+
+ ro = bs->backing_hd->read_only;
+ /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
+ pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
+ open_flags = bs->backing_hd->open_flags;
+
+ if (ro) {
+ if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
+ return -EACCES;
+ }
+ }
+
+ total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+ buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+
+ for (sector = 0; sector < total_sectors; sector += n) {
+ if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
+
+ if (bdrv_read(bs, sector, buf, n) != 0) {
+ ret = -EIO;
+ goto ro_cleanup;
+ }
+
+ if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
+ ret = -EIO;
+ goto ro_cleanup;
+ }
+ }
+ }
+
+ if (drv->bdrv_make_empty) {
+ ret = drv->bdrv_make_empty(bs);
+ bdrv_flush(bs);
+ }
+
+ /*
+ * Make sure all data we wrote to the backing device is actually
+ * stable on disk.
+ */
+ if (bs->backing_hd)
+ bdrv_flush(bs->backing_hd);
+
+ro_cleanup:
+ g_free(buf);
+
+ if (ro) {
+ /* ignoring error return here */
+ bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
+ }
+
+ return ret;
+}
+
+int bdrv_commit_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ if (bs->drv && bs->backing_hd) {
+ int ret = bdrv_commit(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * Remove an active request from the tracked requests list
+ *
+ * This function should be called when a tracked request is completing.
+ */
+static void tracked_request_end(BdrvTrackedRequest *req)
+{
+ QLIST_REMOVE(req, list);
+ qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+/**
+ * Add an active request to the tracked requests list
+ */
+static void tracked_request_begin(BdrvTrackedRequest *req,
+ BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, bool is_write)
+{
+ *req = (BdrvTrackedRequest){
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .is_write = is_write,
+ .co = qemu_coroutine_self(),
+ };
+
+ qemu_co_queue_init(&req->wait_queue);
+
+ QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
+}
+
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_sector_num = sector_num;
+ *cluster_nb_sectors = nb_sectors;
+ } else {
+ int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
+ *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
+ *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
+ nb_sectors, c);
+ }
+}
+
+static bool tracked_request_overlaps(BdrvTrackedRequest *req,
+ int64_t sector_num, int nb_sectors) {
+ /* aaaa bbbb */
+ if (sector_num >= req->sector_num + req->nb_sectors) {
+ return false;
+ }
+ /* bbbb aaaa */
+ if (req->sector_num >= sector_num + nb_sectors) {
+ return false;
+ }
+ return true;
+}
+
+static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ BdrvTrackedRequest *req;
+ int64_t cluster_sector_num;
+ int cluster_nb_sectors;
+ bool retry;
+
+ /* If we touch the same cluster it counts as an overlap. This guarantees
+ * that allocating writes will be serialized and not race with each other
+ * for the same cluster. For example, in copy-on-read it ensures that the
+ * CoR read and write operations are atomic and guest writes cannot
+ * interleave between them.
+ */
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
+
+ do {
+ retry = false;
+ QLIST_FOREACH(req, &bs->tracked_requests, list) {
+ if (tracked_request_overlaps(req, cluster_sector_num,
+ cluster_nb_sectors)) {
+ /* Hitting this means there was a reentrant request, for
+ * example, a block driver issuing nested requests. This must
+ * never happen since it means deadlock.
+ */
+ assert(qemu_coroutine_self() != req->co);
+
+ qemu_co_queue_wait(&req->wait_queue);
+ retry = true;
+ break;
+ }
+ }
+ } while (retry);
+}
+
+/*
+ * Return values:
+ * 0 - success
+ * -EINVAL - backing format specified, but no file
+ * -ENOSPC - can't update the backing file because no space is left in the
+ * image file header
+ * -ENOTSUP - format driver doesn't support changing the backing file
+ */
+int bdrv_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt)
+{
+ BlockDriver *drv = bs->drv;
+ int ret;
+
+ /* Backing file format doesn't make sense without a backing file */
+ if (backing_fmt && !backing_file) {
+ return -EINVAL;
+ }
+
+ if (drv->bdrv_change_backing_file != NULL) {
+ ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
+ } else {
+ ret = -ENOTSUP;
+ }
+
+ if (ret == 0) {
+ pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
+ }
+ return ret;
+}
+
+/*
+ * Finds the image layer in the chain that has 'bs' as its backing file.
+ *
+ * active is the current topmost image.
+ *
+ * Returns NULL if bs is not found in active's image chain,
+ * or if active == bs.
+ */
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs)
+{
+ BlockDriverState *overlay = NULL;
+ BlockDriverState *intermediate;
+
+ assert(active != NULL);
+ assert(bs != NULL);
+
+ /* if bs is the same as active, then by definition it has no overlay
+ */
+ if (active == bs) {
+ return NULL;
+ }
+
+ intermediate = active;
+ while (intermediate->backing_hd) {
+ if (intermediate->backing_hd == bs) {
+ overlay = intermediate;
+ break;
+ }
+ intermediate = intermediate->backing_hd;
+ }
+
+ return overlay;
+}
+
+typedef struct BlkIntermediateStates {
+ BlockDriverState *bs;
+ QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
+} BlkIntermediateStates;
+
+
+/*
+ * Drops images above 'base' up to and including 'top', and sets the image
+ * above 'top' to have base as its backing file.
+ *
+ * Requires that the overlay to 'top' is opened r/w, so that the backing file
+ * information in 'bs' can be properly updated.
+ *
+ * E.g., this will convert the following chain:
+ * bottom <- base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * bottom <- base <- active
+ *
+ * It is allowed for bottom==base, in which case it converts:
+ *
+ * base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * base <- active
+ *
+ * Error conditions:
+ * if active == top, that is considered an error
+ *
+ */
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base)
+{
+ BlockDriverState *intermediate;
+ BlockDriverState *base_bs = NULL;
+ BlockDriverState *new_top_bs = NULL;
+ BlkIntermediateStates *intermediate_state, *next;
+ int ret = -EIO;
+
+ QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
+ QSIMPLEQ_INIT(&states_to_delete);
+
+ if (!top->drv || !base->drv) {
+ goto exit;
+ }
+
+ new_top_bs = bdrv_find_overlay(active, top);
+
+ if (new_top_bs == NULL) {
+ /* we could not find the image above 'top', this is an error */
+ goto exit;
+ }
+
+ /* special case of new_top_bs->backing_hd already pointing to base - nothing
+ * to do, no intermediate images */
+ if (new_top_bs->backing_hd == base) {
+ ret = 0;
+ goto exit;
+ }
+
+ intermediate = top;
+
+ /* now we will go down through the list, and add each BDS we find
+ * into our deletion queue, until we hit the 'base'
+ */
+ while (intermediate) {
+ intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
+ intermediate_state->bs = intermediate;
+ QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
+
+ if (intermediate->backing_hd == base) {
+ base_bs = intermediate->backing_hd;
+ break;
+ }
+ intermediate = intermediate->backing_hd;
+ }
+ if (base_bs == NULL) {
+ /* something went wrong, we did not end at the base. safely
+ * unravel everything, and exit with error */
+ goto exit;
+ }
+
+ /* success - we can delete the intermediate states, and link top->base */
+ ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+ base_bs->drv ? base_bs->drv->format_name : "");
+ if (ret) {
+ goto exit;
+ }
+ new_top_bs->backing_hd = base_bs;
+
+
+ QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+ /* so that bdrv_close() does not recursively close the chain */
+ intermediate_state->bs->backing_hd = NULL;
+ bdrv_delete(intermediate_state->bs);
+ }
+ ret = 0;
+
+exit:
+ QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+ g_free(intermediate_state);
+ }
+ return ret;
+}
+
+
+static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
+ size_t size)
+{
+ int64_t len;
+
+ if (!bdrv_is_inserted(bs))
+ return -ENOMEDIUM;
+
+ if (bs->growable)
+ return 0;
+
+ len = bdrv_getlength(bs);
+
+ if (offset < 0)
+ return -EIO;
+
+ if ((offset > len) || (len - offset < size))
+ return -EIO;
+
+ return 0;
+}
+
+static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
+{
+ return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE);
+}
+
+typedef struct RwCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ int nb_sectors;
+ QEMUIOVector *qiov;
+ bool is_write;
+ int ret;
+} RwCo;
+
+static void coroutine_fn bdrv_rw_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ if (!rwco->is_write) {
+ rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
+ rwco->nb_sectors, rwco->qiov, 0);
+ } else {
+ rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
+ rwco->nb_sectors, rwco->qiov, 0);
+ }
+}
+
+/*
+ * Process a vectored synchronous request using coroutines
+ */
+static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, bool is_write)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = qiov->size >> BDRV_SECTOR_BITS,
+ .qiov = qiov,
+ .is_write = is_write,
+ .ret = NOT_DONE,
+ };
+ assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ /**
+ * In sync call context, when the vcpu is blocked, this throttling timer
+ * will not fire; so the I/O throttling function has to be disabled here
+ * if it has been enabled.
+ */
+ if (bs->io_limits_enabled) {
+ fprintf(stderr, "Disabling I/O throttling on '%s' due "
+ "to synchronous I/O.\n", bdrv_get_device_name(bs));
+ bdrv_io_limits_disable(bs);
+ }
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_rw_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_rw_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+ return rwco.ret;
+}
+
+/*
+ * Process a synchronous request using coroutines
+ */
+static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
+ int nb_sectors, bool is_write)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
+}
+
+/* return < 0 if error. See bdrv_write() for the return codes */
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
+}
+
+/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ bool enabled;
+ int ret;
+
+ enabled = bs->io_limits_enabled;
+ bs->io_limits_enabled = false;
+ ret = bdrv_read(bs, 0, buf, 1);
+ bs->io_limits_enabled = enabled;
+ return ret;
+}
+
+/* Return < 0 if error. Important errors are:
+ -EIO generic I/O error (may happen for all errors)
+ -ENOMEDIUM No media inserted.
+ -EINVAL Invalid sector number or nb_sectors
+ -EACCES Trying to write a read-only device
+*/
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
+}
+
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
+{
+ return bdrv_rwv_co(bs, sector_num, qiov, true);
+}
+
+int bdrv_pread(BlockDriverState *bs, int64_t offset,
+ void *buf, int count1)
+{
+ uint8_t tmp_buf[BDRV_SECTOR_SIZE];
+ int len, nb_sectors, count;
+ int64_t sector_num;
+ int ret;
+
+ count = count1;
+ /* first read to align to sector start */
+ len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
+ if (len > count)
+ len = count;
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ if (len > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
+ count -= len;
+ if (count == 0)
+ return count1;
+ sector_num++;
+ buf += len;
+ }
+
+ /* read the sectors "in place" */
+ nb_sectors = count >> BDRV_SECTOR_BITS;
+ if (nb_sectors > 0) {
+ if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
+ return ret;
+ sector_num += nb_sectors;
+ len = nb_sectors << BDRV_SECTOR_BITS;
+ buf += len;
+ count -= len;
+ }
+
+ /* add data from the last sector */
+ if (count > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ memcpy(buf, tmp_buf, count);
+ }
+ return count1;
+}
+
+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
+{
+ uint8_t tmp_buf[BDRV_SECTOR_SIZE];
+ int len, nb_sectors, count;
+ int64_t sector_num;
+ int ret;
+
+ count = qiov->size;
+
+ /* first write to align to sector start */
+ len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
+ if (len > count)
+ len = count;
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ if (len > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)),
+ len);
+ if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ count -= len;
+ if (count == 0)
+ return qiov->size;
+ sector_num++;
+ }
+
+ /* write the sectors "in place" */
+ nb_sectors = count >> BDRV_SECTOR_BITS;
+ if (nb_sectors > 0) {
+ QEMUIOVector qiov_inplace;
+
+ qemu_iovec_init(&qiov_inplace, qiov->niov);
+ qemu_iovec_concat(&qiov_inplace, qiov, len,
+ nb_sectors << BDRV_SECTOR_BITS);
+ ret = bdrv_writev(bs, sector_num, &qiov_inplace);
+ qemu_iovec_destroy(&qiov_inplace);
+ if (ret < 0) {
+ return ret;
+ }
+
+ sector_num += nb_sectors;
+ len = nb_sectors << BDRV_SECTOR_BITS;
+ count -= len;
+ }
+
+ /* add data from the last sector */
+ if (count > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count);
+ if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ }
+ return qiov->size;
+}
+
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count1)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = count1,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_pwritev(bs, offset, &qiov);
+}
+
+/*
+ * Writes to the file and ensures that no writes are reordered across this
+ * request (acts as a barrier)
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count)
+{
+ int ret;
+
+ ret = bdrv_pwrite(bs, offset, buf, count);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* No flush needed for cache modes that already do it */
+ if (bs->enable_write_cache) {
+ bdrv_flush(bs);
+ }
+
+ return 0;
+}
+
+static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ /* Perform I/O through a temporary buffer so that users who scribble over
+ * their read buffer while the operation is in progress do not end up
+ * modifying the image file. This is critical for zero-copy guest I/O
+ * where anything might happen inside guest memory.
+ */
+ void *bounce_buffer;
+
+ BlockDriver *drv = bs->drv;
+ struct iovec iov;
+ QEMUIOVector bounce_qiov;
+ int64_t cluster_sector_num;
+ int cluster_nb_sectors;
+ size_t skip_bytes;
+ int ret;
+
+ /* Cover entire cluster so no additional backing file I/O is required when
+ * allocating cluster in the image file.
+ */
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
+
+ trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
+ cluster_sector_num, cluster_nb_sectors);
+
+ iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
+ qemu_iovec_init_external(&bounce_qiov, &iov, 1);
+
+ ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
+ &bounce_qiov);
+ if (ret < 0) {
+ goto err;
+ }
+
+ if (drv->bdrv_co_write_zeroes &&
+ buffer_is_zero(bounce_buffer, iov.iov_len)) {
+ ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
+ cluster_nb_sectors);
+ } else {
+ /* This does not change the data on the disk, it is not necessary
+ * to flush even in cache=writethrough mode.
+ */
+ ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
+ &bounce_qiov);
+ }
+
+ if (ret < 0) {
+ /* It might be okay to ignore write errors for guest requests. If this
+ * is a deliberate copy-on-read then we don't want to ignore the error.
+ * Simply report it in all cases.
+ */
+ goto err;
+ }
+
+ skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
+ qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
+ nb_sectors * BDRV_SECTOR_SIZE);
+
+err:
+ qemu_vfree(bounce_buffer);
+ return ret;
+}
+
+/*
+ * Handle a read request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest req;
+ int ret;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ }
+
+ /* throttling disk read I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, false, nb_sectors);
+ }
+
+ if (bs->copy_on_read) {
+ flags |= BDRV_REQ_COPY_ON_READ;
+ }
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ bs->copy_on_read_in_flight++;
+ }
+
+ if (bs->copy_on_read_in_flight) {
+ wait_for_overlapping_requests(bs, sector_num, nb_sectors);
+ }
+
+ tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
+
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ int pnum;
+
+ ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (!ret || pnum != nb_sectors) {
+ ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+ goto out;
+ }
+ }
+
+ ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+
+out:
+ tracked_request_end(&req);
+
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ bs->copy_on_read_in_flight--;
+ }
+
+ return ret;
+}
+
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_readv(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
+ BDRV_REQ_COPY_ON_READ);
+}
+
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ BlockDriver *drv = bs->drv;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ int ret;
+
+ /* TODO Emulate only part of misaligned requests instead of letting block
+ * drivers return -ENOTSUP and emulate everything */
+
+ /* First try the efficient write zeroes operation */
+ if (drv->bdrv_co_write_zeroes) {
+ ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ if (ret != -ENOTSUP) {
+ return ret;
+ }
+ }
+
+ /* Fall back to bounce buffer if write zeroes is unsupported */
+ iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+ memset(iov.iov_base, 0, iov.iov_len);
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
+/*
+ * Handle a write request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest req;
+ int ret;
+
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ }
+ if (bs->read_only) {
+ return -EACCES;
+ }
+ if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ }
+
+ /* throttling disk write I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, true, nb_sectors);
+ }
+
+ if (bs->copy_on_read_in_flight) {
+ wait_for_overlapping_requests(bs, sector_num, nb_sectors);
+ }
+
+ tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
+
+ ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
+
+ if (ret < 0) {
+ /* Do nothing, write notifier decided to fail this request */
+ } else if (flags & BDRV_REQ_ZERO_WRITE) {
+ ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
+ } else {
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ }
+
+ if (ret == 0 && !bs->enable_write_cache) {
+ ret = bdrv_co_flush(bs);
+ }
+
+ if (bs->dirty_bitmap) {
+ bdrv_set_dirty(bs, sector_num, nb_sectors);
+ }
+
+ if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
+ bs->wr_highest_sector = sector_num + nb_sectors - 1;
+ }
+
+ tracked_request_end(&req);
+
+ return ret;
+}
+
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
+ BDRV_REQ_ZERO_WRITE);
+}
+
+/**
+ * Truncate file to 'offset' bytes (needed only for file protocols)
+ */
+int bdrv_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BlockDriver *drv = bs->drv;
+ int ret;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (!drv->bdrv_truncate)
+ return -ENOTSUP;
+ if (bs->read_only)
+ return -EACCES;
+ if (bdrv_in_use(bs))
+ return -EBUSY;
+ ret = drv->bdrv_truncate(bs, offset);
+ if (ret == 0) {
+ ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
+ bdrv_dev_resize_cb(bs);
+ }
+ return ret;
+}
+
+/**
+ * Length of a allocated file in bytes. Sparse files are counted by actual
+ * allocated space. Return < 0 if error or unknown.
+ */
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_get_allocated_file_size) {
+ return drv->bdrv_get_allocated_file_size(bs);
+ }
+ if (bs->file) {
+ return bdrv_get_allocated_file_size(bs->file);
+ }
+ return -ENOTSUP;
+}
+
+/**
+ * Length of a file in bytes. Return < 0 if error or unknown.
+ */
+int64_t bdrv_getlength(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+
+ if (bs->growable || bdrv_dev_has_removable_media(bs)) {
+ if (drv->bdrv_getlength) {
+ return drv->bdrv_getlength(bs);
+ }
+ }
+ return bs->total_sectors * BDRV_SECTOR_SIZE;
+}
+
+/* return 0 as number of sectors if no device present or error */
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
+{
+ int64_t length;
+ length = bdrv_getlength(bs);
+ if (length < 0)
+ length = 0;
+ else
+ length = length >> BDRV_SECTOR_BITS;
+ *nb_sectors_ptr = length;
+}
+
+/* throttling disk io limits */
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits)
+{
+ bs->io_limits = *io_limits;
+ bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
+}
+
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error)
+{
+ bs->on_read_error = on_read_error;
+ bs->on_write_error = on_write_error;
+}
+
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
+{
+ return is_read ? bs->on_read_error : bs->on_write_error;
+}
+
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
+{
+ BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
+
+ switch (on_err) {
+ case BLOCKDEV_ON_ERROR_ENOSPC:
+ return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_STOP:
+ return BDRV_ACTION_STOP;
+ case BLOCKDEV_ON_ERROR_REPORT:
+ return BDRV_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_IGNORE:
+ return BDRV_ACTION_IGNORE;
+ default:
+ abort();
+ }
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error)
+{
+ assert(error >= 0);
+ bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
+ if (action == BDRV_ACTION_STOP) {
+ vm_stop(RUN_STATE_IO_ERROR);
+ bdrv_iostatus_set_err(bs, error);
+ }
+}
+
+int bdrv_is_read_only(BlockDriverState *bs)
+{
+ return bs->read_only;
+}
+
+int bdrv_is_sg(BlockDriverState *bs)
+{
+ return bs->sg;
+}
+
+int bdrv_enable_write_cache(BlockDriverState *bs)
+{
+ return bs->enable_write_cache;
+}
+
+void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
+{
+ bs->enable_write_cache = wce;
+
+ /* so a reopen() will preserve wce */
+ if (wce) {
+ bs->open_flags |= BDRV_O_CACHE_WB;
+ } else {
+ bs->open_flags &= ~BDRV_O_CACHE_WB;
+ }
+}
+
+int bdrv_is_encrypted(BlockDriverState *bs)
+{
+ if (bs->backing_hd && bs->backing_hd->encrypted)
+ return 1;
+ return bs->encrypted;
+}
+
+int bdrv_key_required(BlockDriverState *bs)
+{
+ BlockDriverState *backing_hd = bs->backing_hd;
+
+ if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
+ return 1;
+ return (bs->encrypted && !bs->valid_key);
+}
+
+int bdrv_set_key(BlockDriverState *bs, const char *key)
+{
+ int ret;
+ if (bs->backing_hd && bs->backing_hd->encrypted) {
+ ret = bdrv_set_key(bs->backing_hd, key);
+ if (ret < 0)
+ return ret;
+ if (!bs->encrypted)
+ return 0;
+ }
+ if (!bs->encrypted) {
+ return -EINVAL;
+ } else if (!bs->drv || !bs->drv->bdrv_set_key) {
+ return -ENOMEDIUM;
+ }
+ ret = bs->drv->bdrv_set_key(bs, key);
+ if (ret < 0) {
+ bs->valid_key = 0;
+ } else if (!bs->valid_key) {
+ bs->valid_key = 1;
+ /* call the change callback now, we skipped it on open */
+ bdrv_dev_change_media_cb(bs, true);
+ }
+ return ret;
+}
+
+const char *bdrv_get_format_name(BlockDriverState *bs)
+{
+ return bs->drv ? bs->drv->format_name : NULL;
+}
+
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque)
+{
+ BlockDriver *drv;
+
+ QLIST_FOREACH(drv, &bdrv_drivers, list) {
+ it(opaque, drv->format_name);
+ }
+}
+
+BlockDriverState *bdrv_find(const char *name)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ if (!strcmp(name, bs->device_name)) {
+ return bs;
+ }
+ }
+ return NULL;
+}
+
+BlockDriverState *bdrv_next(BlockDriverState *bs)
+{
+ if (!bs) {
+ return QTAILQ_FIRST(&bdrv_states);
+ }
+ return QTAILQ_NEXT(bs, list);
+}
+
+void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ it(opaque, bs);
+ }
+}
+
+const char *bdrv_get_device_name(BlockDriverState *bs)
+{
+ return bs->device_name;
+}
+
+int bdrv_get_flags(BlockDriverState *bs)
+{
+ return bs->open_flags;
+}
+
+int bdrv_flush_all(void)
+{
+ BlockDriverState *bs;
+ int result = 0;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ int ret = bdrv_flush(bs);
+ if (ret < 0 && !result) {
+ result = ret;
+ }
+ }
+
+ return result;
+}
+
+int bdrv_has_zero_init_1(BlockDriverState *bs)
+{
+ return 1;
+}
+
+int bdrv_has_zero_init(BlockDriverState *bs)
+{
+ assert(bs->drv);
+
+ if (bs->drv->bdrv_has_zero_init) {
+ return bs->drv->bdrv_has_zero_init(bs);
+ }
+
+ /* safe default */
+ return 0;
+}
+
+typedef struct BdrvCoIsAllocatedData {
+ BlockDriverState *bs;
+ BlockDriverState *base;
+ int64_t sector_num;
+ int nb_sectors;
+ int *pnum;
+ int ret;
+ bool done;
+} BdrvCoIsAllocatedData;
+
+/*
+ * Returns true iff the specified sector is present in the disk image. Drivers
+ * not implementing the functionality are assumed to not support backing files,
+ * hence all their sectors are reported as allocated.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ */
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ int64_t n;
+
+ if (sector_num >= bs->total_sectors) {
+ *pnum = 0;
+ return 0;
+ }
+
+ n = bs->total_sectors - sector_num;
+ if (n < nb_sectors) {
+ nb_sectors = n;
+ }
+
+ if (!bs->drv->bdrv_co_is_allocated) {
+ *pnum = nb_sectors;
+ return 1;
+ }
+
+ return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
+}
+
+/* Coroutine wrapper for bdrv_is_allocated() */
+static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
+{
+ BdrvCoIsAllocatedData *data = opaque;
+ BlockDriverState *bs = data->bs;
+
+ data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
+ data->pnum);
+ data->done = true;
+}
+
+/*
+ * Synchronous wrapper around bdrv_co_is_allocated().
+ *
+ * See bdrv_co_is_allocated() for details.
+ */
+int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ int *pnum)
+{
+ Coroutine *co;
+ BdrvCoIsAllocatedData data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .pnum = pnum,
+ .done = false,
+ };
+
+ co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
+ qemu_coroutine_enter(co, &data);
+ while (!data.done) {
+ qemu_aio_wait();
+ }
+ return data.ret;
+}
+
+/*
+ * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
+ *
+ * Return true if the given sector is allocated in any image between
+ * BASE and TOP (inclusive). BASE can be NULL to check if the given
+ * sector is allocated in any image of the chain. Return false otherwise.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ */
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BlockDriverState *intermediate;
+ int ret, n = nb_sectors;
+
+ intermediate = top;
+ while (intermediate && intermediate != base) {
+ int pnum_inter;
+ ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
+ &pnum_inter);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ *pnum = pnum_inter;
+ return 1;
+ }
+
+ /*
+ * [sector_num, nb_sectors] is unallocated on top but intermediate
+ * might have
+ *
+ * [sector_num+x, nr_sectors] allocated.
+ */
+ if (n > pnum_inter &&
+ (intermediate == top ||
+ sector_num + pnum_inter < intermediate->total_sectors)) {
+ n = pnum_inter;
+ }
+
+ intermediate = intermediate->backing_hd;
+ }
+
+ *pnum = n;
+ return 0;
+}
+
+/* Coroutine wrapper for bdrv_is_allocated_above() */
+static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque)
+{
+ BdrvCoIsAllocatedData *data = opaque;
+ BlockDriverState *top = data->bs;
+ BlockDriverState *base = data->base;
+
+ data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num,
+ data->nb_sectors, data->pnum);
+ data->done = true;
+}
+
+/*
+ * Synchronous wrapper around bdrv_co_is_allocated_above().
+ *
+ * See bdrv_co_is_allocated_above() for details.
+ */
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ Coroutine *co;
+ BdrvCoIsAllocatedData data = {
+ .bs = top,
+ .base = base,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .pnum = pnum,
+ .done = false,
+ };
+
+ co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry);
+ qemu_coroutine_enter(co, &data);
+ while (!data.done) {
+ qemu_aio_wait();
+ }
+ return data.ret;
+}
+
+const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
+{
+ if (bs->backing_hd && bs->backing_hd->encrypted)
+ return bs->backing_file;
+ else if (bs->encrypted)
+ return bs->filename;
+ else
+ return NULL;
+}
+
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size)
+{
+ pstrcpy(filename, filename_size, bs->backing_file);
+}
+
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (!drv->bdrv_write_compressed)
+ return -ENOTSUP;
+ if (bdrv_check_request(bs, sector_num, nb_sectors))
+ return -EIO;
+
+ assert(!bs->dirty_bitmap);
+
+ return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
+}
+
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (!drv->bdrv_get_info)
+ return -ENOTSUP;
+ memset(bdi, 0, sizeof(*bdi));
+ return drv->bdrv_get_info(bs, bdi);
+}
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = size,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_writev_vmstate(bs, &qiov, pos);
+}
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ } else if (drv->bdrv_save_vmstate) {
+ return drv->bdrv_save_vmstate(bs, qiov, pos);
+ } else if (bs->file) {
+ return bdrv_writev_vmstate(bs->file, qiov, pos);
+ }
+
+ return -ENOTSUP;
+}
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (drv->bdrv_load_vmstate)
+ return drv->bdrv_load_vmstate(bs, buf, pos, size);
+ if (bs->file)
+ return bdrv_load_vmstate(bs->file, buf, pos, size);
+ return -ENOTSUP;
+}
+
+void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
+{
+ if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
+ return;
+ }
+
+ bs->drv->bdrv_debug_event(bs, event);
+}
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
+ return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
+ }
+
+ return -ENOTSUP;
+}
+
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
+ return bs->drv->bdrv_debug_resume(bs, tag);
+ }
+
+ return -ENOTSUP;
+}
+
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
+ return bs->drv->bdrv_debug_is_suspended(bs, tag);
+ }
+
+ return false;
+}
+
+int bdrv_is_snapshot(BlockDriverState *bs)
+{
+ return !!(bs->open_flags & BDRV_O_SNAPSHOT);
+}
+
+/* backing_file can either be relative, or absolute, or a protocol. If it is
+ * relative, it must be relative to the chain. So, passing in bs->filename
+ * from a BDS as backing_file should not be done, as that may be relative to
+ * the CWD rather than the chain. */
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file)
+{
+ char *filename_full = NULL;
+ char *backing_file_full = NULL;
+ char *filename_tmp = NULL;
+ int is_protocol = 0;
+ BlockDriverState *curr_bs = NULL;
+ BlockDriverState *retval = NULL;
+
+ if (!bs || !bs->drv || !backing_file) {
+ return NULL;
+ }
+
+ filename_full = g_malloc(PATH_MAX);
+ backing_file_full = g_malloc(PATH_MAX);
+ filename_tmp = g_malloc(PATH_MAX);
+
+ is_protocol = path_has_protocol(backing_file);
+
+ for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
+
+ /* If either of the filename paths is actually a protocol, then
+ * compare unmodified paths; otherwise make paths relative */
+ if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
+ if (strcmp(backing_file, curr_bs->backing_file) == 0) {
+ retval = curr_bs->backing_hd;
+ break;
+ }
+ } else {
+ /* If not an absolute filename path, make it relative to the current
+ * image's filename path */
+ path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
+ backing_file);
+
+ /* We are going to compare absolute pathnames */
+ if (!realpath(filename_tmp, filename_full)) {
+ continue;
+ }
+
+ /* We need to make sure the backing filename we are comparing against
+ * is relative to the current image filename (or absolute) */
+ path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
+ curr_bs->backing_file);
+
+ if (!realpath(filename_tmp, backing_file_full)) {
+ continue;
+ }
+
+ if (strcmp(backing_file_full, filename_full) == 0) {
+ retval = curr_bs->backing_hd;
+ break;
+ }
+ }
+ }
+
+ g_free(filename_full);
+ g_free(backing_file_full);
+ g_free(filename_tmp);
+ return retval;
+}
+
+int bdrv_get_backing_file_depth(BlockDriverState *bs)
+{
+ if (!bs->drv) {
+ return 0;
+ }
+
+ if (!bs->backing_hd) {
+ return 0;
+ }
+
+ return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
+}
+
+BlockDriverState *bdrv_find_base(BlockDriverState *bs)
+{
+ BlockDriverState *curr_bs = NULL;
+
+ if (!bs) {
+ return NULL;
+ }
+
+ curr_bs = bs;
+
+ while (curr_bs->backing_hd) {
+ curr_bs = curr_bs->backing_hd;
+ }
+ return curr_bs;
+}
+
+/**************************************************************/
+/* async I/Os */
+
+BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
+
+ return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, false);
+}
+
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
+
+ return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, true);
+}
+
+
+typedef struct MultiwriteCB {
+ int error;
+ int num_requests;
+ int num_callbacks;
+ struct {
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+ QEMUIOVector *free_qiov;
+ } callbacks[];
+} MultiwriteCB;
+
+static void multiwrite_user_cb(MultiwriteCB *mcb)
+{
+ int i;
+
+ for (i = 0; i < mcb->num_callbacks; i++) {
+ mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
+ if (mcb->callbacks[i].free_qiov) {
+ qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
+ }
+ g_free(mcb->callbacks[i].free_qiov);
+ }
+}
+
+static void multiwrite_cb(void *opaque, int ret)
+{
+ MultiwriteCB *mcb = opaque;
+
+ trace_multiwrite_cb(mcb, ret);
+
+ if (ret < 0 && !mcb->error) {
+ mcb->error = ret;
+ }
+
+ mcb->num_requests--;
+ if (mcb->num_requests == 0) {
+ multiwrite_user_cb(mcb);
+ g_free(mcb);
+ }
+}
+
+static int multiwrite_req_compare(const void *a, const void *b)
+{
+ const BlockRequest *req1 = a, *req2 = b;
+
+ /*
+ * Note that we can't simply subtract req2->sector from req1->sector
+ * here as that could overflow the return value.
+ */
+ if (req1->sector > req2->sector) {
+ return 1;
+ } else if (req1->sector < req2->sector) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+/*
+ * Takes a bunch of requests and tries to merge them. Returns the number of
+ * requests that remain after merging.
+ */
+static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
+ int num_reqs, MultiwriteCB *mcb)
+{
+ int i, outidx;
+
+ // Sort requests by start sector
+ qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
+
+ // Check if adjacent requests touch the same clusters. If so, combine them,
+ // filling up gaps with zero sectors.
+ outidx = 0;
+ for (i = 1; i < num_reqs; i++) {
+ int merge = 0;
+ int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
+
+ // Handle exactly sequential writes and overlapping writes.
+ if (reqs[i].sector <= oldreq_last) {
+ merge = 1;
+ }
+
+ if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
+ merge = 0;
+ }
+
+ if (merge) {
+ size_t size;
+ QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
+ qemu_iovec_init(qiov,
+ reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
+
+ // Add the first request to the merged one. If the requests are
+ // overlapping, drop the last sectors of the first request.
+ size = (reqs[i].sector - reqs[outidx].sector) << 9;
+ qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
+
+ // We should need to add any zeros between the two requests
+ assert (reqs[i].sector <= oldreq_last);
+
+ // Add the second request
+ qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
+
+ reqs[outidx].nb_sectors = qiov->size >> 9;
+ reqs[outidx].qiov = qiov;
+
+ mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
+ } else {
+ outidx++;
+ reqs[outidx].sector = reqs[i].sector;
+ reqs[outidx].nb_sectors = reqs[i].nb_sectors;
+ reqs[outidx].qiov = reqs[i].qiov;
+ }
+ }
+
+ return outidx + 1;
+}
+
+/*
+ * Submit multiple AIO write requests at once.
+ *
+ * On success, the function returns 0 and all requests in the reqs array have
+ * been submitted. In error case this function returns -1, and any of the
+ * requests may or may not be submitted yet. In particular, this means that the
+ * callback will be called for some of the requests, for others it won't. The
+ * caller must check the error field of the BlockRequest to wait for the right
+ * callbacks (if error != 0, no callback will be called).
+ *
+ * The implementation may modify the contents of the reqs array, e.g. to merge
+ * requests. However, the fields opaque and error are left unmodified as they
+ * are used to signal failure for a single request to the caller.
+ */
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
+{
+ MultiwriteCB *mcb;
+ int i;
+
+ /* don't submit writes if we don't have a medium */
+ if (bs->drv == NULL) {
+ for (i = 0; i < num_reqs; i++) {
+ reqs[i].error = -ENOMEDIUM;
+ }
+ return -1;
+ }
+
+ if (num_reqs == 0) {
+ return 0;
+ }
+
+ // Create MultiwriteCB structure
+ mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
+ mcb->num_requests = 0;
+ mcb->num_callbacks = num_reqs;
+
+ for (i = 0; i < num_reqs; i++) {
+ mcb->callbacks[i].cb = reqs[i].cb;
+ mcb->callbacks[i].opaque = reqs[i].opaque;
+ }
+
+ // Check for mergable requests
+ num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
+
+ trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
+
+ /* Run the aio requests. */
+ mcb->num_requests = num_reqs;
+ for (i = 0; i < num_reqs; i++) {
+ bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
+ reqs[i].nb_sectors, multiwrite_cb, mcb);
+ }
+
+ return 0;
+}
+
+void bdrv_aio_cancel(BlockDriverAIOCB *acb)
+{
+ acb->aiocb_info->cancel(acb);
+}
+
+/* block I/O throttling */
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait)
+{
+ uint64_t bps_limit = 0;
+ uint64_t extension;
+ double bytes_limit, bytes_base, bytes_res;
+ double slice_time, wait_time;
+
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.bps[is_write]) {
+ bps_limit = bs->io_limits.bps[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ slice_time = bs->slice_end - bs->slice_start;
+ slice_time /= (NANOSECONDS_PER_SECOND);
+ bytes_limit = bps_limit * slice_time;
+ bytes_base = bs->slice_submitted.bytes[is_write];
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bytes_base += bs->slice_submitted.bytes[!is_write];
+ }
+
+ /* bytes_base: the bytes of data which have been read/written; and
+ * it is obtained from the history statistic info.
+ * bytes_res: the remaining bytes of data which need to be read/written.
+ * (bytes_base + bytes_res) / bps_limit: used to calcuate
+ * the total time for completing reading/writting all data.
+ */
+ bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
+
+ if (bytes_base + bytes_res <= bytes_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch */
+ wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
+
+ /* When the I/O rate at runtime exceeds the limits,
+ * bs->slice_end need to be extended in order that the current statistic
+ * info can be kept until the timer fire, so it is increased and tuned
+ * based on the result of experiment.
+ */
+ extension = wait_time * NANOSECONDS_PER_SECOND;
+ extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
+ BLOCK_IO_SLICE_TIME;
+ bs->slice_end += extension;
+ if (wait) {
+ *wait = wait_time * NANOSECONDS_PER_SECOND;
+ }
+
+ return true;
+}
+
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait)
+{
+ uint64_t iops_limit = 0;
+ double ios_limit, ios_base;
+ double slice_time, wait_time;
+
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.iops[is_write]) {
+ iops_limit = bs->io_limits.iops[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ slice_time = bs->slice_end - bs->slice_start;
+ slice_time /= (NANOSECONDS_PER_SECOND);
+ ios_limit = iops_limit * slice_time;
+ ios_base = bs->slice_submitted.ios[is_write];
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ ios_base += bs->slice_submitted.ios[!is_write];
+ }
+
+ if (ios_base + 1 <= ios_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch, in seconds */
+ wait_time = (ios_base + 1) / iops_limit;
+ if (wait_time > elapsed_time) {
+ wait_time = wait_time - elapsed_time;
+ } else {
+ wait_time = 0;
+ }
+
+ /* Exceeded current slice, extend it by another slice time */
+ bs->slice_end += BLOCK_IO_SLICE_TIME;
+ if (wait) {
+ *wait = wait_time * NANOSECONDS_PER_SECOND;
+ }
+
+ return true;
+}
+
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, int64_t *wait)
+{
+ int64_t now, max_wait;
+ uint64_t bps_wait = 0, iops_wait = 0;
+ double elapsed_time;
+ int bps_ret, iops_ret;
+
+ now = qemu_get_clock_ns(vm_clock);
+ if (now > bs->slice_end) {
+ bs->slice_start = now;
+ bs->slice_end = now + BLOCK_IO_SLICE_TIME;
+ memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
+ }
+
+ elapsed_time = now - bs->slice_start;
+ elapsed_time /= (NANOSECONDS_PER_SECOND);
+
+ bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
+ is_write, elapsed_time, &bps_wait);
+ iops_ret = bdrv_exceed_iops_limits(bs, is_write,
+ elapsed_time, &iops_wait);
+ if (bps_ret || iops_ret) {
+ max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
+ if (wait) {
+ *wait = max_wait;
+ }
+
+ now = qemu_get_clock_ns(vm_clock);
+ if (bs->slice_end < now + max_wait) {
+ bs->slice_end = now + max_wait;
+ }
+
+ return true;
+ }
+
+ if (wait) {
+ *wait = 0;
+ }
+
+ bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
+ BDRV_SECTOR_SIZE;
+ bs->slice_submitted.ios[is_write]++;
+
+ return false;
+}
+
+/**************************************************************/
+/* async block device emulation */
+
+typedef struct BlockDriverAIOCBSync {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+ int ret;
+ /* vector translation state */
+ QEMUIOVector *qiov;
+ uint8_t *bounce;
+ int is_write;
+} BlockDriverAIOCBSync;
+
+static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
+{
+ BlockDriverAIOCBSync *acb =
+ container_of(blockacb, BlockDriverAIOCBSync, common);
+ qemu_bh_delete(acb->bh);
+ acb->bh = NULL;
+ qemu_aio_release(acb);
+}
+
+static const AIOCBInfo bdrv_em_aiocb_info = {
+ .aiocb_size = sizeof(BlockDriverAIOCBSync),
+ .cancel = bdrv_aio_cancel_em,
+};
+
+static void bdrv_aio_bh_cb(void *opaque)
+{
+ BlockDriverAIOCBSync *acb = opaque;
+
+ if (!acb->is_write)
+ qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+ qemu_vfree(acb->bounce);
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_bh_delete(acb->bh);
+ acb->bh = NULL;
+ qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ int is_write)
+
+{
+ BlockDriverAIOCBSync *acb;
+
+ acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
+ acb->is_write = is_write;
+ acb->qiov = qiov;
+ acb->bounce = qemu_blockalign(bs, qiov->size);
+ acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+
+ if (is_write) {
+ qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
+ acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
+ } else {
+ acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
+ }
+
+ qemu_bh_schedule(acb->bh);
+
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+
+typedef struct BlockDriverAIOCBCoroutine {
+ BlockDriverAIOCB common;
+ BlockRequest req;
+ bool is_write;
+ bool *done;
+ QEMUBH* bh;
+} BlockDriverAIOCBCoroutine;
+
+static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
+{
+ BlockDriverAIOCBCoroutine *acb =
+ container_of(blockacb, BlockDriverAIOCBCoroutine, common);
+ bool done = false;
+
+ acb->done = &done;
+ while (!done) {
+ qemu_aio_wait();
+ }
+}
+
+static const AIOCBInfo bdrv_em_co_aiocb_info = {
+ .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
+ .cancel = bdrv_aio_co_cancel_em,
+};
+
+static void bdrv_co_em_bh(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+
+ acb->common.cb(acb->common.opaque, acb->req.error);
+
+ if (acb->done) {
+ *acb->done = true;
+ }
+
+ qemu_bh_delete(acb->bh);
+ qemu_aio_release(acb);
+}
+
+/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
+static void coroutine_fn bdrv_co_do_rw(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ if (!acb->is_write) {
+ acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
+ acb->req.nb_sectors, acb->req.qiov, 0);
+ } else {
+ acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
+ acb->req.nb_sectors, acb->req.qiov, 0);
+ }
+
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ bool is_write)
+{
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->req.sector = sector_num;
+ acb->req.nb_sectors = nb_sectors;
+ acb->req.qiov = qiov;
+ acb->is_write = is_write;
+ acb->done = NULL;
+
+ co = qemu_coroutine_create(bdrv_co_do_rw);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->req.error = bdrv_co_flush(bs);
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_flush(bs, opaque);
+
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->done = NULL;
+
+ co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->req.sector = sector_num;
+ acb->req.nb_sectors = nb_sectors;
+ acb->done = NULL;
+ co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+void bdrv_init(void)
+{
+ module_call_init(MODULE_INIT_BLOCK);
+}
+
+void bdrv_init_with_whitelist(void)
+{
+ use_bdrv_whitelist = 1;
+ bdrv_init();
+}
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BlockDriverAIOCB *acb;
+
+ acb = g_slice_alloc(aiocb_info->aiocb_size);
+ acb->aiocb_info = aiocb_info;
+ acb->bs = bs;
+ acb->cb = cb;
+ acb->opaque = opaque;
+ return acb;
+}
+
+void qemu_aio_release(void *p)
+{
+ BlockDriverAIOCB *acb = p;
+ g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+}
+
+/**************************************************************/
+/* Coroutine block device emulation */
+
+typedef struct CoroutineIOCompletion {
+ Coroutine *coroutine;
+ int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+ CoroutineIOCompletion *co = opaque;
+
+ co->ret = ret;
+ qemu_coroutine_enter(co->coroutine, NULL);
+}
+
+static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *iov,
+ bool is_write)
+{
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BlockDriverAIOCB *acb;
+
+ if (is_write) {
+ acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ } else {
+ acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ }
+
+ trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
+ if (!acb) {
+ return -EIO;
+ }
+ qemu_coroutine_yield();
+
+ return co.ret;
+}
+
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
+}
+
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
+}
+
+static void coroutine_fn bdrv_flush_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ rwco->ret = bdrv_co_flush(rwco->bs);
+}
+
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+{
+ int ret;
+
+ if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+ return 0;
+ }
+
+ /* Write back cached data to the OS even with cache=unsafe */
+ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
+ if (bs->drv->bdrv_co_flush_to_os) {
+ ret = bs->drv->bdrv_co_flush_to_os(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ /* But don't actually force it to the disk with cache=unsafe */
+ if (bs->open_flags & BDRV_O_NO_FLUSH) {
+ goto flush_parent;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
+ if (bs->drv->bdrv_co_flush_to_disk) {
+ ret = bs->drv->bdrv_co_flush_to_disk(bs);
+ } else if (bs->drv->bdrv_aio_flush) {
+ BlockDriverAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ ret = -EIO;
+ } else {
+ qemu_coroutine_yield();
+ ret = co.ret;
+ }
+ } else {
+ /*
+ * Some block drivers always operate in either writethrough or unsafe
+ * mode and don't support bdrv_flush therefore. Usually qemu doesn't
+ * know how the server works (because the behaviour is hardcoded or
+ * depends on server-side configuration), so we can't ensure that
+ * everything is safe on disk. Returning an error doesn't work because
+ * that would break guests even if the server operates in writethrough
+ * mode.
+ *
+ * Let's hope the user knows what he's doing.
+ */
+ ret = 0;
+ }
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
+ * in the case of cache=unsafe, so there are no useless flushes.
+ */
+flush_parent:
+ return bdrv_co_flush(bs->file);
+}
+
+void bdrv_invalidate_cache(BlockDriverState *bs)
+{
+ if (bs->drv && bs->drv->bdrv_invalidate_cache) {
+ bs->drv->bdrv_invalidate_cache(bs);
+ }
+}
+
+void bdrv_invalidate_cache_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bdrv_invalidate_cache(bs);
+ }
+}
+
+void bdrv_clear_incoming_migration_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
+ }
+}
+
+int bdrv_flush(BlockDriverState *bs)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_flush_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_flush_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ return rwco.ret;
+}
+
+static void coroutine_fn bdrv_discard_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
+}
+
+int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
+{
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ } else if (bs->read_only) {
+ return -EROFS;
+ }
+
+ if (bs->dirty_bitmap) {
+ bdrv_reset_dirty(bs, sector_num, nb_sectors);
+ }
+
+ /* Do nothing if disabled. */
+ if (!(bs->open_flags & BDRV_O_UNMAP)) {
+ return 0;
+ }
+
+ if (bs->drv->bdrv_co_discard) {
+ return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
+ } else if (bs->drv->bdrv_aio_discard) {
+ BlockDriverAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ return -EIO;
+ } else {
+ qemu_coroutine_yield();
+ return co.ret;
+ }
+ } else {
+ return 0;
+ }
+}
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_discard_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_discard_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ return rwco.ret;
+}
+
+/**************************************************************/
+/* removable device support */
+
+/**
+ * Return TRUE if the media is present
+ */
+int bdrv_is_inserted(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (!drv)
+ return 0;
+ if (!drv->bdrv_is_inserted)
+ return 1;
+ return drv->bdrv_is_inserted(bs);
+}
+
+/**
+ * Return whether the media changed since the last call to this
+ * function, or -ENOTSUP if we don't know. Most drivers don't know.
+ */
+int bdrv_media_changed(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_media_changed) {
+ return drv->bdrv_media_changed(bs);
+ }
+ return -ENOTSUP;
+}
+
+/**
+ * If eject_flag is TRUE, eject the media. Otherwise, close the tray
+ */
+void bdrv_eject(BlockDriverState *bs, bool eject_flag)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_eject) {
+ drv->bdrv_eject(bs, eject_flag);
+ }
+
+ if (bs->device_name[0] != '\0') {
+ bdrv_emit_qmp_eject_event(bs, eject_flag);
+ }
+}
+
+/**
+ * Lock or unlock the media (if it is locked, the user won't be able
+ * to eject it manually).
+ */
+void bdrv_lock_medium(BlockDriverState *bs, bool locked)
+{
+ BlockDriver *drv = bs->drv;
+
+ trace_bdrv_lock_medium(bs, locked);
+
+ if (drv && drv->bdrv_lock_medium) {
+ drv->bdrv_lock_medium(bs, locked);
+ }
+}
+
+/* needed for generic scsi interface */
+
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_ioctl)
+ return drv->bdrv_ioctl(bs, req, buf);
+ return -ENOTSUP;
+}
+
+BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_aio_ioctl)
+ return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
+ return NULL;
+}
+
+void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
+{
+ bs->buffer_alignment = align;
+}
+
+void *qemu_blockalign(BlockDriverState *bs, size_t size)
+{
+ return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
+}
+
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
+{
+ int i;
+
+ for (i = 0; i < qiov->niov; i++) {
+ if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
+{
+ int64_t bitmap_size;
+
+ assert((granularity & (granularity - 1)) == 0);
+
+ if (granularity) {
+ granularity >>= BDRV_SECTOR_BITS;
+ assert(!bs->dirty_bitmap);
+ bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+ bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
+ } else {
+ if (bs->dirty_bitmap) {
+ hbitmap_free(bs->dirty_bitmap);
+ bs->dirty_bitmap = NULL;
+ }
+ }
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
+{
+ if (bs->dirty_bitmap) {
+ return hbitmap_get(bs->dirty_bitmap, sector);
+ } else {
+ return 0;
+ }
+}
+
+void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
+{
+ hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int nr_sectors)
+{
+ hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int nr_sectors)
+{
+ hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
+}
+
+int64_t bdrv_get_dirty_count(BlockDriverState *bs)
+{
+ if (bs->dirty_bitmap) {
+ return hbitmap_count(bs->dirty_bitmap);
+ } else {
+ return 0;
+ }
+}
+
+void bdrv_set_in_use(BlockDriverState *bs, int in_use)
+{
+ assert(bs->in_use != in_use);
+ bs->in_use = in_use;
+}
+
+int bdrv_in_use(BlockDriverState *bs)
+{
+ return bs->in_use;
+}
+
+void bdrv_iostatus_enable(BlockDriverState *bs)
+{
+ bs->iostatus_enabled = true;
+ bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
+{
+ return (bs->iostatus_enabled &&
+ (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+ bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
+ bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
+}
+
+void bdrv_iostatus_disable(BlockDriverState *bs)
+{
+ bs->iostatus_enabled = false;
+}
+
+void bdrv_iostatus_reset(BlockDriverState *bs)
+{
+ if (bdrv_iostatus_is_enabled(bs)) {
+ bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+ if (bs->job) {
+ block_job_iostatus_reset(bs->job);
+ }
+ }
+}
+
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
+{
+ assert(bdrv_iostatus_is_enabled(bs));
+ if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+ bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+ BLOCK_DEVICE_IO_STATUS_FAILED;
+ }
+}
+
+void
+bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
+ enum BlockAcctType type)
+{
+ assert(type < BDRV_MAX_IOTYPE);
+
+ cookie->bytes = bytes;
+ cookie->start_time_ns = get_clock();
+ cookie->type = type;
+}
+
+void
+bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
+{
+ assert(cookie->type < BDRV_MAX_IOTYPE);
+
+ bs->nr_bytes[cookie->type] += cookie->bytes;
+ bs->nr_ops[cookie->type]++;
+ bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
+}
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ Error **errp, bool quiet)
+{
+ QEMUOptionParameter *param = NULL, *create_options = NULL;
+ QEMUOptionParameter *backing_fmt, *backing_file, *size;
+ BlockDriverState *bs = NULL;
+ BlockDriver *drv, *proto_drv;
+ BlockDriver *backing_drv = NULL;
+ int ret = 0;
+
+ /* Find driver and parse its options */
+ drv = bdrv_find_format(fmt);
+ if (!drv) {
+ error_setg(errp, "Unknown file format '%s'", fmt);
+ return;
+ }
+
+ proto_drv = bdrv_find_protocol(filename, true);
+ if (!proto_drv) {
+ error_setg(errp, "Unknown protocol '%s'", filename);
+ return;
+ }
+
+ create_options = append_option_parameters(create_options,
+ drv->create_options);
+ create_options = append_option_parameters(create_options,
+ proto_drv->create_options);
+
+ /* Create parameter list with default values */
+ param = parse_option_parameters("", create_options, param);
+
+ set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
+
+ /* Parse -o options */
+ if (options) {
+ param = parse_option_parameters(options, create_options, param);
+ if (param == NULL) {
+ error_setg(errp, "Invalid options for file format '%s'.", fmt);
+ goto out;
+ }
+ }
+
+ if (base_filename) {
+ if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
+ base_filename)) {
+ error_setg(errp, "Backing file not supported for file format '%s'",
+ fmt);
+ goto out;
+ }
+ }
+
+ if (base_fmt) {
+ if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
+ error_setg(errp, "Backing file format not supported for file "
+ "format '%s'", fmt);
+ goto out;
+ }
+ }
+
+ backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
+ if (backing_file && backing_file->value.s) {
+ if (!strcmp(filename, backing_file->value.s)) {
+ error_setg(errp, "Error: Trying to create an image with the "
+ "same filename as the backing file");
+ goto out;
+ }
+ }
+
+ backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
+ if (backing_fmt && backing_fmt->value.s) {
+ backing_drv = bdrv_find_format(backing_fmt->value.s);
+ if (!backing_drv) {
+ error_setg(errp, "Unknown backing file format '%s'",
+ backing_fmt->value.s);
+ goto out;
+ }
+ }
+
+ // The size for the image must always be specified, with one exception:
+ // If we are using a backing file, we can obtain the size from there
+ size = get_option_parameter(param, BLOCK_OPT_SIZE);
+ if (size && size->value.n == -1) {
+ if (backing_file && backing_file->value.s) {
+ uint64_t size;
+ char buf[32];
+ int back_flags;
+
+ /* backing files always opened read-only */
+ back_flags =
+ flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+ bs = bdrv_new("");
+
+ ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags,
+ backing_drv);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not open '%s'",
+ backing_file->value.s);
+ goto out;
+ }
+ bdrv_get_geometry(bs, &size);
+ size *= 512;
+
+ snprintf(buf, sizeof(buf), "%" PRId64, size);
+ set_option_parameter(param, BLOCK_OPT_SIZE, buf);
+ } else {
+ error_setg(errp, "Image creation needs a size parameter");
+ goto out;
+ }
+ }
+
+ if (!quiet) {
+ printf("Formatting '%s', fmt=%s ", filename, fmt);
+ print_option_parameters(param);
+ puts("");
+ }
+ ret = bdrv_create(drv, filename, param);
+ if (ret < 0) {
+ if (ret == -ENOTSUP) {
+ error_setg(errp,"Formatting or formatting option not supported for "
+ "file format '%s'", fmt);
+ } else if (ret == -EFBIG) {
+ const char *cluster_size_hint = "";
+ if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
+ cluster_size_hint = " (try using a larger cluster size)";
+ }
+ error_setg(errp, "The image size is too large for file format '%s'%s",
+ fmt, cluster_size_hint);
+ } else {
+ error_setg(errp, "%s: error while creating %s: %s", filename, fmt,
+ strerror(-ret));
+ }
+ }
+
+out:
+ free_option_parameters(create_options);
+ free_option_parameters(param);
+
+ if (bs) {
+ bdrv_delete(bs);
+ }
+}
+
+AioContext *bdrv_get_aio_context(BlockDriverState *bs)
+{
+ /* Currently BlockDriverState always uses the main loop AioContext */
+ return qemu_get_aio_context();
+}
+
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+ NotifierWithReturn *notifier)
+{
+ notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
+}
diff --git a/contrib/qemu/block/qcow.c b/contrib/qemu/block/qcow.c
new file mode 100644
index 000000000..5239bd68f
--- /dev/null
+++ b/contrib/qemu/block/qcow.c
@@ -0,0 +1,914 @@
+/*
+ * Block driver for the QCOW format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+#include <zlib.h>
+#include "qemu/aes.h"
+#include "migration/migration.h"
+
+/**************************************************************/
+/* QEMU COW block driver with compression and encryption support */
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 1
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+
+#define QCOW_OFLAG_COMPRESSED (1LL << 63)
+
+typedef struct QCowHeader {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t mtime;
+ uint64_t size; /* in bytes */
+ uint8_t cluster_bits;
+ uint8_t l2_bits;
+ uint32_t crypt_method;
+ uint64_t l1_table_offset;
+} QCowHeader;
+
+#define L2_CACHE_SIZE 16
+
+typedef struct BDRVQcowState {
+ int cluster_bits;
+ int cluster_size;
+ int cluster_sectors;
+ int l2_bits;
+ int l2_size;
+ int l1_size;
+ uint64_t cluster_offset_mask;
+ uint64_t l1_table_offset;
+ uint64_t *l1_table;
+ uint64_t *l2_cache;
+ uint64_t l2_cache_offsets[L2_CACHE_SIZE];
+ uint32_t l2_cache_counts[L2_CACHE_SIZE];
+ uint8_t *cluster_cache;
+ uint8_t *cluster_data;
+ uint64_t cluster_cache_offset;
+ uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+ uint32_t crypt_method_header;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+ CoMutex lock;
+ Error *migration_blocker;
+} BDRVQcowState;
+
+static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
+
+static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ const QCowHeader *cow_header = (const void *)buf;
+
+ if (buf_size >= sizeof(QCowHeader) &&
+ be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+ be32_to_cpu(cow_header->version) == QCOW_VERSION)
+ return 100;
+ else
+ return 0;
+}
+
+static int qcow_open(BlockDriverState *bs, QDict *options, int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ int len, i, shift, ret;
+ QCowHeader header;
+
+ ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ if (ret < 0) {
+ goto fail;
+ }
+ be32_to_cpus(&header.magic);
+ be32_to_cpus(&header.version);
+ be64_to_cpus(&header.backing_file_offset);
+ be32_to_cpus(&header.backing_file_size);
+ be32_to_cpus(&header.mtime);
+ be64_to_cpus(&header.size);
+ be32_to_cpus(&header.crypt_method);
+ be64_to_cpus(&header.l1_table_offset);
+
+ if (header.magic != QCOW_MAGIC) {
+ ret = -EMEDIUMTYPE;
+ goto fail;
+ }
+ if (header.version != QCOW_VERSION) {
+ char version[64];
+ snprintf(version, sizeof(version), "QCOW version %d", header.version);
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "qcow", version);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ if (header.size <= 1 || header.cluster_bits < 9) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (header.crypt_method > QCOW_CRYPT_AES) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->crypt_method_header = header.crypt_method;
+ if (s->crypt_method_header) {
+ bs->encrypted = 1;
+ }
+ s->cluster_bits = header.cluster_bits;
+ s->cluster_size = 1 << s->cluster_bits;
+ s->cluster_sectors = 1 << (s->cluster_bits - 9);
+ s->l2_bits = header.l2_bits;
+ s->l2_size = 1 << s->l2_bits;
+ bs->total_sectors = header.size / 512;
+ s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
+
+ /* read the level 1 table */
+ shift = s->cluster_bits + s->l2_bits;
+ s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
+
+ s->l1_table_offset = header.l1_table_offset;
+ s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+
+ ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+ /* alloc L2 cache */
+ s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ s->cluster_cache = g_malloc(s->cluster_size);
+ s->cluster_data = g_malloc(s->cluster_size);
+ s->cluster_cache_offset = -1;
+
+ /* read the backing file name */
+ if (header.backing_file_offset != 0) {
+ len = header.backing_file_size;
+ if (len > 1023) {
+ len = 1023;
+ }
+ ret = bdrv_pread(bs->file, header.backing_file_offset,
+ bs->backing_file, len);
+ if (ret < 0) {
+ goto fail;
+ }
+ bs->backing_file[len] = '\0';
+ }
+
+ /* Disable migration when qcow images are used */
+ error_set(&s->migration_blocker,
+ QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+ "qcow", bs->device_name, "live migration");
+ migrate_add_blocker(s->migration_blocker);
+
+ qemu_co_mutex_init(&s->lock);
+ return 0;
+
+ fail:
+ g_free(s->l1_table);
+ g_free(s->l2_cache);
+ g_free(s->cluster_cache);
+ g_free(s->cluster_data);
+ return ret;
+}
+
+
+/* We have nothing to do for QCOW reopen, stubs just return
+ * success */
+static int qcow_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static int qcow_set_key(BlockDriverState *bs, const char *key)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint8_t keybuf[16];
+ int len, i;
+
+ memset(keybuf, 0, 16);
+ len = strlen(key);
+ if (len > 16)
+ len = 16;
+ /* XXX: we could compress the chars to 7 bits to increase
+ entropy */
+ for(i = 0;i < len;i++) {
+ keybuf[i] = key[i];
+ }
+ s->crypt_method = s->crypt_method_header;
+
+ if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ return -1;
+ if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+ return -1;
+ return 0;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+ algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+ supported */
+static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key)
+{
+ union {
+ uint64_t ll[2];
+ uint8_t b[16];
+ } ivec;
+ int i;
+
+ for(i = 0; i < nb_sectors; i++) {
+ ivec.ll[0] = cpu_to_le64(sector_num);
+ ivec.ll[1] = 0;
+ AES_cbc_encrypt(in_buf, out_buf, 512, key,
+ ivec.b, enc);
+ sector_num++;
+ in_buf += 512;
+ out_buf += 512;
+ }
+}
+
+/* 'allocate' is:
+ *
+ * 0 to not allocate.
+ *
+ * 1 to allocate a normal cluster (for sector indexes 'n_start' to
+ * 'n_end')
+ *
+ * 2 to allocate a compressed cluster of size
+ * 'compressed_size'. 'compressed_size' must be > 0 and <
+ * cluster_size
+ *
+ * return 0 if not allocated.
+ */
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+ uint64_t offset, int allocate,
+ int compressed_size,
+ int n_start, int n_end)
+{
+ BDRVQcowState *s = bs->opaque;
+ int min_index, i, j, l1_index, l2_index;
+ uint64_t l2_offset, *l2_table, cluster_offset, tmp;
+ uint32_t min_count;
+ int new_l2_table;
+
+ l1_index = offset >> (s->l2_bits + s->cluster_bits);
+ l2_offset = s->l1_table[l1_index];
+ new_l2_table = 0;
+ if (!l2_offset) {
+ if (!allocate)
+ return 0;
+ /* allocate a new l2 entry */
+ l2_offset = bdrv_getlength(bs->file);
+ /* round to cluster size */
+ l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
+ /* update the L1 entry */
+ s->l1_table[l1_index] = l2_offset;
+ tmp = cpu_to_be64(l2_offset);
+ if (bdrv_pwrite_sync(bs->file,
+ s->l1_table_offset + l1_index * sizeof(tmp),
+ &tmp, sizeof(tmp)) < 0)
+ return 0;
+ new_l2_table = 1;
+ }
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (l2_offset == s->l2_cache_offsets[i]) {
+ /* increment the hit count */
+ if (++s->l2_cache_counts[i] == 0xffffffff) {
+ for(j = 0; j < L2_CACHE_SIZE; j++) {
+ s->l2_cache_counts[j] >>= 1;
+ }
+ }
+ l2_table = s->l2_cache + (i << s->l2_bits);
+ goto found;
+ }
+ }
+ /* not found: load a new entry in the least used one */
+ min_index = 0;
+ min_count = 0xffffffff;
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (s->l2_cache_counts[i] < min_count) {
+ min_count = s->l2_cache_counts[i];
+ min_index = i;
+ }
+ }
+ l2_table = s->l2_cache + (min_index << s->l2_bits);
+ if (new_l2_table) {
+ memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+ if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t)) < 0)
+ return 0;
+ } else {
+ if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ s->l2_size * sizeof(uint64_t))
+ return 0;
+ }
+ s->l2_cache_offsets[min_index] = l2_offset;
+ s->l2_cache_counts[min_index] = 1;
+ found:
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ if (!cluster_offset ||
+ ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
+ if (!allocate)
+ return 0;
+ /* allocate a new cluster */
+ if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
+ (n_end - n_start) < s->cluster_sectors) {
+ /* if the cluster is already compressed, we must
+ decompress it in the case it is not completely
+ overwritten */
+ if (decompress_cluster(bs, cluster_offset) < 0)
+ return 0;
+ cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = (cluster_offset + s->cluster_size - 1) &
+ ~(s->cluster_size - 1);
+ /* write the cluster content */
+ if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
+ s->cluster_size)
+ return -1;
+ } else {
+ cluster_offset = bdrv_getlength(bs->file);
+ if (allocate == 1) {
+ /* round to cluster size */
+ cluster_offset = (cluster_offset + s->cluster_size - 1) &
+ ~(s->cluster_size - 1);
+ bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+ /* if encrypted, we must initialize the cluster
+ content which won't be written */
+ if (s->crypt_method &&
+ (n_end - n_start) < s->cluster_sectors) {
+ uint64_t start_sect;
+ start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
+ memset(s->cluster_data + 512, 0x00, 512);
+ for(i = 0; i < s->cluster_sectors; i++) {
+ if (i < n_start || i >= n_end) {
+ encrypt_sectors(s, start_sect + i,
+ s->cluster_data,
+ s->cluster_data + 512, 1, 1,
+ &s->aes_encrypt_key);
+ if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
+ s->cluster_data, 512) != 512)
+ return -1;
+ }
+ }
+ }
+ } else if (allocate == 2) {
+ cluster_offset |= QCOW_OFLAG_COMPRESSED |
+ (uint64_t)compressed_size << (63 - s->cluster_bits);
+ }
+ }
+ /* update L2 table */
+ tmp = cpu_to_be64(cluster_offset);
+ l2_table[l2_index] = tmp;
+ if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+ &tmp, sizeof(tmp)) < 0)
+ return 0;
+ }
+ return cluster_offset;
+}
+
+static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster, n;
+ uint64_t cluster_offset;
+
+ qemu_co_mutex_lock(&s->lock);
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+ qemu_co_mutex_unlock(&s->lock);
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ *pnum = n;
+ return (cluster_offset != 0);
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+ const uint8_t *buf, int buf_size)
+{
+ z_stream strm1, *strm = &strm1;
+ int ret, out_len;
+
+ memset(strm, 0, sizeof(*strm));
+
+ strm->next_in = (uint8_t *)buf;
+ strm->avail_in = buf_size;
+ strm->next_out = out_buf;
+ strm->avail_out = out_buf_size;
+
+ ret = inflateInit2(strm, -12);
+ if (ret != Z_OK)
+ return -1;
+ ret = inflate(strm, Z_FINISH);
+ out_len = strm->next_out - out_buf;
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+ out_len != out_buf_size) {
+ inflateEnd(strm);
+ return -1;
+ }
+ inflateEnd(strm);
+ return 0;
+}
+
+static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, csize;
+ uint64_t coffset;
+
+ coffset = cluster_offset & s->cluster_offset_mask;
+ if (s->cluster_cache_offset != coffset) {
+ csize = cluster_offset >> (63 - s->cluster_bits);
+ csize &= (s->cluster_size - 1);
+ ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
+ if (ret != csize)
+ return -1;
+ if (decompress_buffer(s->cluster_cache, s->cluster_size,
+ s->cluster_data, csize) < 0) {
+ return -1;
+ }
+ s->cluster_cache_offset = coffset;
+ }
+ return 0;
+}
+
+static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster;
+ int ret = 0, n;
+ uint64_t cluster_offset;
+ struct iovec hd_iov;
+ QEMUIOVector hd_qiov;
+ uint8_t *buf;
+ void *orig_buf;
+
+ if (qiov->niov > 1) {
+ buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ } else {
+ orig_buf = NULL;
+ buf = (uint8_t *)qiov->iov->iov_base;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (nb_sectors != 0) {
+ /* prepare next request */
+ cluster_offset = get_cluster_offset(bs, sector_num << 9,
+ 0, 0, 0, 0);
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
+
+ if (!cluster_offset) {
+ if (bs->backing_hd) {
+ /* read from the base image */
+ hd_iov.iov_base = (void *)buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ /* Note: in this case, no need to wait */
+ memset(buf, 0, 512 * n);
+ }
+ } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ /* add AIO support for compressed blocks ? */
+ if (decompress_cluster(bs, cluster_offset) < 0) {
+ goto fail;
+ }
+ memcpy(buf,
+ s->cluster_cache + index_in_cluster * 512, 512 * n);
+ } else {
+ if ((cluster_offset & 511) != 0) {
+ goto fail;
+ }
+ hd_iov.iov_base = (void *)buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ break;
+ }
+ if (s->crypt_method) {
+ encrypt_sectors(s, sector_num, buf, buf,
+ n, 0,
+ &s->aes_decrypt_key);
+ }
+ }
+ ret = 0;
+
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+
+done:
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (qiov->niov > 1) {
+ qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
+ qemu_vfree(orig_buf);
+ }
+
+ return ret;
+
+fail:
+ ret = -EIO;
+ goto done;
+}
+
+static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster;
+ uint64_t cluster_offset;
+ const uint8_t *src_buf;
+ int ret = 0, n;
+ uint8_t *cluster_data = NULL;
+ struct iovec hd_iov;
+ QEMUIOVector hd_qiov;
+ uint8_t *buf;
+ void *orig_buf;
+
+ s->cluster_cache_offset = -1; /* disable compressed cache */
+
+ if (qiov->niov > 1) {
+ buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
+ } else {
+ orig_buf = NULL;
+ buf = (uint8_t *)qiov->iov->iov_base;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (nb_sectors != 0) {
+
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
+ index_in_cluster,
+ index_in_cluster + n);
+ if (!cluster_offset || (cluster_offset & 511) != 0) {
+ ret = -EIO;
+ break;
+ }
+ if (s->crypt_method) {
+ if (!cluster_data) {
+ cluster_data = g_malloc0(s->cluster_size);
+ }
+ encrypt_sectors(s, sector_num, cluster_data, buf,
+ n, 1, &s->aes_encrypt_key);
+ src_buf = cluster_data;
+ } else {
+ src_buf = buf;
+ }
+
+ hd_iov.iov_base = (void *)src_buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_writev(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ break;
+ }
+ ret = 0;
+
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (qiov->niov > 1) {
+ qemu_vfree(orig_buf);
+ }
+ g_free(cluster_data);
+
+ return ret;
+}
+
+static void qcow_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ g_free(s->l1_table);
+ g_free(s->l2_cache);
+ g_free(s->cluster_cache);
+ g_free(s->cluster_data);
+
+ migrate_del_blocker(s->migration_blocker);
+ error_free(s->migration_blocker);
+}
+
+static int qcow_create(const char *filename, QEMUOptionParameter *options)
+{
+ int header_size, backing_filename_len, l1_size, shift, i;
+ QCowHeader header;
+ uint8_t *tmp;
+ int64_t total_size = 0;
+ const char *backing_file = NULL;
+ int flags = 0;
+ int ret;
+ BlockDriverState *qcow_bs;
+
+ /* Read out options */
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ total_size = options->value.n / 512;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
+ flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
+ }
+ options++;
+ }
+
+ ret = bdrv_create_file(filename, options);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_truncate(qcow_bs, 0);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be32(QCOW_MAGIC);
+ header.version = cpu_to_be32(QCOW_VERSION);
+ header.size = cpu_to_be64(total_size * 512);
+ header_size = sizeof(header);
+ backing_filename_len = 0;
+ if (backing_file) {
+ if (strcmp(backing_file, "fat:")) {
+ header.backing_file_offset = cpu_to_be64(header_size);
+ backing_filename_len = strlen(backing_file);
+ header.backing_file_size = cpu_to_be32(backing_filename_len);
+ header_size += backing_filename_len;
+ } else {
+ /* special backing file for vvfat */
+ backing_file = NULL;
+ }
+ header.cluster_bits = 9; /* 512 byte cluster to avoid copying
+ unmodifyed sectors */
+ header.l2_bits = 12; /* 32 KB L2 tables */
+ } else {
+ header.cluster_bits = 12; /* 4 KB clusters */
+ header.l2_bits = 9; /* 4 KB L2 tables */
+ }
+ header_size = (header_size + 7) & ~7;
+ shift = header.cluster_bits + header.l2_bits;
+ l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
+
+ header.l1_table_offset = cpu_to_be64(header_size);
+ if (flags & BLOCK_FLAG_ENCRYPT) {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+ } else {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+ }
+
+ /* write all the data */
+ ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
+ if (ret != sizeof(header)) {
+ goto exit;
+ }
+
+ if (backing_file) {
+ ret = bdrv_pwrite(qcow_bs, sizeof(header),
+ backing_file, backing_filename_len);
+ if (ret != backing_filename_len) {
+ goto exit;
+ }
+ }
+
+ tmp = g_malloc0(BDRV_SECTOR_SIZE);
+ for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
+ BDRV_SECTOR_SIZE); i++) {
+ ret = bdrv_pwrite(qcow_bs, header_size +
+ BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+ if (ret != BDRV_SECTOR_SIZE) {
+ g_free(tmp);
+ goto exit;
+ }
+ }
+
+ g_free(tmp);
+ ret = 0;
+exit:
+ bdrv_delete(qcow_bs);
+ return ret;
+}
+
+static int qcow_make_empty(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+ int ret;
+
+ memset(s->l1_table, 0, l1_length);
+ if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
+ l1_length) < 0)
+ return -1;
+ ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+ if (ret < 0)
+ return ret;
+
+ memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
+ memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
+
+ return 0;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+ tables to avoid losing bytes in alignment */
+static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ z_stream strm;
+ int ret, out_len;
+ uint8_t *out_buf;
+ uint64_t cluster_offset;
+
+ if (nb_sectors != s->cluster_sectors) {
+ ret = -EINVAL;
+
+ /* Zero-pad last write if image size is not cluster aligned */
+ if (sector_num + nb_sectors == bs->total_sectors &&
+ nb_sectors < s->cluster_sectors) {
+ uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
+ memset(pad_buf, 0, s->cluster_size);
+ memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
+ ret = qcow_write_compressed(bs, sector_num,
+ pad_buf, s->cluster_sectors);
+ qemu_vfree(pad_buf);
+ }
+ return ret;
+ }
+
+ out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+
+ /* best compression, small window, no zlib header */
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -12,
+ 9, Z_DEFAULT_STRATEGY);
+ if (ret != 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ strm.avail_in = s->cluster_size;
+ strm.next_in = (uint8_t *)buf;
+ strm.avail_out = s->cluster_size;
+ strm.next_out = out_buf;
+
+ ret = deflate(&strm, Z_FINISH);
+ if (ret != Z_STREAM_END && ret != Z_OK) {
+ deflateEnd(&strm);
+ ret = -EINVAL;
+ goto fail;
+ }
+ out_len = strm.next_out - out_buf;
+
+ deflateEnd(&strm);
+
+ if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+ /* could not compress: write normal cluster */
+ ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
+ out_len, 0, 0);
+ if (cluster_offset == 0) {
+ ret = -EIO;
+ goto fail;
+ }
+
+ cluster_offset &= s->cluster_offset_mask;
+ ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = 0;
+fail:
+ g_free(out_buf);
+ return ret;
+}
+
+static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVQcowState *s = bs->opaque;
+ bdi->cluster_size = s->cluster_size;
+ return 0;
+}
+
+
+static QEMUOptionParameter qcow_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_ENCRYPT,
+ .type = OPT_FLAG,
+ .help = "Encrypt the image"
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_qcow = {
+ .format_name = "qcow",
+ .instance_size = sizeof(BDRVQcowState),
+ .bdrv_probe = qcow_probe,
+ .bdrv_open = qcow_open,
+ .bdrv_close = qcow_close,
+ .bdrv_reopen_prepare = qcow_reopen_prepare,
+ .bdrv_create = qcow_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+
+ .bdrv_co_readv = qcow_co_readv,
+ .bdrv_co_writev = qcow_co_writev,
+ .bdrv_co_is_allocated = qcow_co_is_allocated,
+
+ .bdrv_set_key = qcow_set_key,
+ .bdrv_make_empty = qcow_make_empty,
+ .bdrv_write_compressed = qcow_write_compressed,
+ .bdrv_get_info = qcow_get_info,
+
+ .create_options = qcow_create_options,
+};
+
+static void bdrv_qcow_init(void)
+{
+ bdrv_register(&bdrv_qcow);
+}
+
+block_init(bdrv_qcow_init);
diff --git a/contrib/qemu/block/qcow2-cache.c b/contrib/qemu/block/qcow2-cache.c
new file mode 100644
index 000000000..2f3114ecc
--- /dev/null
+++ b/contrib/qemu/block/qcow2-cache.c
@@ -0,0 +1,323 @@
+/*
+ * L2/refcount table cache for the QCOW2 format
+ *
+ * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/block_int.h"
+#include "qemu-common.h"
+#include "qcow2.h"
+#include "trace.h"
+
+typedef struct Qcow2CachedTable {
+ void* table;
+ int64_t offset;
+ bool dirty;
+ int cache_hits;
+ int ref;
+} Qcow2CachedTable;
+
+struct Qcow2Cache {
+ Qcow2CachedTable* entries;
+ struct Qcow2Cache* depends;
+ int size;
+ bool depends_on_flush;
+};
+
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2Cache *c;
+ int i;
+
+ c = g_malloc0(sizeof(*c));
+ c->size = num_tables;
+ c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
+
+ for (i = 0; i < c->size; i++) {
+ c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
+ }
+
+ return c;
+}
+
+int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ assert(c->entries[i].ref == 0);
+ qemu_vfree(c->entries[i].table);
+ }
+
+ g_free(c->entries);
+ g_free(c);
+
+ return 0;
+}
+
+static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
+{
+ int ret;
+
+ ret = qcow2_cache_flush(bs, c->depends);
+ if (ret < 0) {
+ return ret;
+ }
+
+ c->depends = NULL;
+ c->depends_on_flush = false;
+
+ return 0;
+}
+
+static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret = 0;
+
+ if (!c->entries[i].dirty || !c->entries[i].offset) {
+ return 0;
+ }
+
+ trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+
+ if (c->depends) {
+ ret = qcow2_cache_flush_dependency(bs, c);
+ } else if (c->depends_on_flush) {
+ ret = bdrv_flush(bs->file);
+ if (ret >= 0) {
+ c->depends_on_flush = false;
+ }
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (c == s->refcount_block_cache) {
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
+ } else if (c == s->l2_table_cache) {
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
+ }
+
+ ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
+ s->cluster_size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ c->entries[i].dirty = false;
+
+ return 0;
+}
+
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+{
+ BDRVQcowState *s = bs->opaque;
+ int result = 0;
+ int ret;
+ int i;
+
+ trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
+
+ for (i = 0; i < c->size; i++) {
+ ret = qcow2_cache_entry_flush(bs, c, i);
+ if (ret < 0 && result != -ENOSPC) {
+ result = ret;
+ }
+ }
+
+ if (result == 0) {
+ ret = bdrv_flush(bs->file);
+ if (ret < 0) {
+ result = ret;
+ }
+ }
+
+ return result;
+}
+
+int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
+ Qcow2Cache *dependency)
+{
+ int ret;
+
+ if (dependency->depends) {
+ ret = qcow2_cache_flush_dependency(bs, dependency);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (c->depends && (c->depends != dependency)) {
+ ret = qcow2_cache_flush_dependency(bs, c);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ c->depends = dependency;
+ return 0;
+}
+
+void qcow2_cache_depends_on_flush(Qcow2Cache *c)
+{
+ c->depends_on_flush = true;
+}
+
+static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
+{
+ int i;
+ int min_count = INT_MAX;
+ int min_index = -1;
+
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].ref) {
+ continue;
+ }
+
+ if (c->entries[i].cache_hits < min_count) {
+ min_index = i;
+ min_count = c->entries[i].cache_hits;
+ }
+
+ /* Give newer hits priority */
+ /* TODO Check how to optimize the replacement strategy */
+ c->entries[i].cache_hits /= 2;
+ }
+
+ if (min_index == -1) {
+ /* This can't happen in current synchronous code, but leave the check
+ * here as a reminder for whoever starts using AIO with the cache */
+ abort();
+ }
+ return min_index;
+}
+
+static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
+ uint64_t offset, void **table, bool read_from_disk)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i;
+ int ret;
+
+ trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
+ offset, read_from_disk);
+
+ /* Check if the table is already cached */
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].offset == offset) {
+ goto found;
+ }
+ }
+
+ /* If not, write a table back and replace it */
+ i = qcow2_cache_find_entry_to_replace(c);
+ trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+ if (i < 0) {
+ return i;
+ }
+
+ ret = qcow2_cache_entry_flush(bs, c, i);
+ if (ret < 0) {
+ return ret;
+ }
+
+ trace_qcow2_cache_get_read(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+ c->entries[i].offset = 0;
+ if (read_from_disk) {
+ if (c == s->l2_table_cache) {
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
+ }
+
+ ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ /* Give the table some hits for the start so that it won't be replaced
+ * immediately. The number 32 is completely arbitrary. */
+ c->entries[i].cache_hits = 32;
+ c->entries[i].offset = offset;
+
+ /* And return the right table */
+found:
+ c->entries[i].cache_hits++;
+ c->entries[i].ref++;
+ *table = c->entries[i].table;
+
+ trace_qcow2_cache_get_done(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+
+ return 0;
+}
+
+int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table)
+{
+ return qcow2_cache_do_get(bs, c, offset, table, true);
+}
+
+int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table)
+{
+ return qcow2_cache_do_get(bs, c, offset, table, false);
+}
+
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].table == *table) {
+ goto found;
+ }
+ }
+ return -ENOENT;
+
+found:
+ c->entries[i].ref--;
+ *table = NULL;
+
+ assert(c->entries[i].ref >= 0);
+ return 0;
+}
+
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].table == table) {
+ goto found;
+ }
+ }
+ abort();
+
+found:
+ c->entries[i].dirty = true;
+}
diff --git a/contrib/qemu/block/qcow2-cluster.c b/contrib/qemu/block/qcow2-cluster.c
new file mode 100644
index 000000000..cca76d4fc
--- /dev/null
+++ b/contrib/qemu/block/qcow2-cluster.c
@@ -0,0 +1,1478 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <zlib.h>
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+#include "trace.h"
+
+int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
+ bool exact_size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int new_l1_size2, ret, i;
+ uint64_t *new_l1_table;
+ int64_t new_l1_table_offset, new_l1_size;
+ uint8_t data[12];
+
+ if (min_size <= s->l1_size)
+ return 0;
+
+ if (exact_size) {
+ new_l1_size = min_size;
+ } else {
+ /* Bump size up to reduce the number of times we have to grow */
+ new_l1_size = s->l1_size;
+ if (new_l1_size == 0) {
+ new_l1_size = 1;
+ }
+ while (min_size > new_l1_size) {
+ new_l1_size = (new_l1_size * 3 + 1) / 2;
+ }
+ }
+
+ if (new_l1_size > INT_MAX) {
+ return -EFBIG;
+ }
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
+ s->l1_size, new_l1_size);
+#endif
+
+ new_l1_size2 = sizeof(uint64_t) * new_l1_size;
+ new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
+ memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
+
+ /* write new table (align to cluster) */
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
+ new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
+ if (new_l1_table_offset < 0) {
+ g_free(new_l1_table);
+ return new_l1_table_offset;
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
+ for(i = 0; i < s->l1_size; i++)
+ new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
+ ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+ if (ret < 0)
+ goto fail;
+ for(i = 0; i < s->l1_size; i++)
+ new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
+
+ /* set new table */
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
+ cpu_to_be32w((uint32_t*)data, new_l1_size);
+ cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+ if (ret < 0) {
+ goto fail;
+ }
+ g_free(s->l1_table);
+ qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ s->l1_table_offset = new_l1_table_offset;
+ s->l1_table = new_l1_table;
+ s->l1_size = new_l1_size;
+ return 0;
+ fail:
+ g_free(new_l1_table);
+ qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
+ QCOW2_DISCARD_OTHER);
+ return ret;
+}
+
+/*
+ * l2_load
+ *
+ * Loads a L2 table into memory. If the table is in the cache, the cache
+ * is used; otherwise the L2 table is loaded from the image file.
+ *
+ * Returns a pointer to the L2 table on success, or NULL if the read from
+ * the image file failed.
+ */
+
+static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
+ uint64_t **l2_table)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
+
+ return ret;
+}
+
+/*
+ * Writes one sector of the L1 table to the disk (can't update single entries
+ * and we really don't want bdrv_pread to perform a read-modify-write)
+ */
+#define L1_ENTRIES_PER_SECTOR (512 / 8)
+static int write_l1_entry(BlockDriverState *bs, int l1_index)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t buf[L1_ENTRIES_PER_SECTOR];
+ int l1_start_index;
+ int i, ret;
+
+ l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
+ for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
+ buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
+ ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
+ buf, sizeof(buf));
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * l2_allocate
+ *
+ * Allocate a new l2 entry in the file. If l1_index points to an already
+ * used entry in the L2 table (i.e. we are doing a copy on write for the L2
+ * table) copy the contents of the old L2 table into the newly allocated one.
+ * Otherwise the new table is initialized with zeros.
+ *
+ */
+
+static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t old_l2_offset;
+ uint64_t *l2_table;
+ int64_t l2_offset;
+ int ret;
+
+ old_l2_offset = s->l1_table[l1_index];
+
+ trace_qcow2_l2_allocate(bs, l1_index);
+
+ /* allocate a new l2 entry */
+
+ l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
+ if (l2_offset < 0) {
+ return l2_offset;
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* allocate a new entry in the l2 cache */
+
+ trace_qcow2_l2_allocate_get_empty(bs, l1_index);
+ ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ l2_table = *table;
+
+ if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
+ /* if there was no old l2 table, clear the new table */
+ memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+ } else {
+ uint64_t* old_table;
+
+ /* if there was an old l2 table, read it from the disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
+ ret = qcow2_cache_get(bs, s->l2_table_cache,
+ old_l2_offset & L1E_OFFSET_MASK,
+ (void**) &old_table);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ memcpy(l2_table, old_table, s->cluster_size);
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ /* write the l2 table to the file */
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
+
+ trace_qcow2_l2_allocate_write_l2(bs, l1_index);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* update the L1 entry */
+ trace_qcow2_l2_allocate_write_l1(bs, l1_index);
+ s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
+ ret = write_l1_entry(bs, l1_index);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ *table = l2_table;
+ trace_qcow2_l2_allocate_done(bs, l1_index, 0);
+ return 0;
+
+fail:
+ trace_qcow2_l2_allocate_done(bs, l1_index, ret);
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
+ s->l1_table[l1_index] = old_l2_offset;
+ return ret;
+}
+
+/*
+ * Checks how many clusters in a given L2 table are contiguous in the image
+ * file. As soon as one of the flags in the bitmask stop_flags changes compared
+ * to the first cluster, the search is stopped and the cluster is not counted
+ * as contiguous. (This allows it, for example, to stop at the first compressed
+ * cluster which may require a different handling)
+ */
+static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
+ uint64_t *l2_table, uint64_t start, uint64_t stop_flags)
+{
+ int i;
+ uint64_t mask = stop_flags | L2E_OFFSET_MASK;
+ uint64_t offset = be64_to_cpu(l2_table[0]) & mask;
+
+ if (!offset)
+ return 0;
+
+ for (i = start; i < start + nb_clusters; i++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
+ if (offset + (uint64_t) i * cluster_size != l2_entry) {
+ break;
+ }
+ }
+
+ return (i - start);
+}
+
+static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
+{
+ int i;
+
+ for (i = 0; i < nb_clusters; i++) {
+ int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
+
+ if (type != QCOW2_CLUSTER_UNALLOCATED) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+ algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+ supported */
+void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key)
+{
+ union {
+ uint64_t ll[2];
+ uint8_t b[16];
+ } ivec;
+ int i;
+
+ for(i = 0; i < nb_sectors; i++) {
+ ivec.ll[0] = cpu_to_le64(sector_num);
+ ivec.ll[1] = 0;
+ AES_cbc_encrypt(in_buf, out_buf, 512, key,
+ ivec.b, enc);
+ sector_num++;
+ in_buf += 512;
+ out_buf += 512;
+ }
+}
+
+static int coroutine_fn copy_sectors(BlockDriverState *bs,
+ uint64_t start_sect,
+ uint64_t cluster_offset,
+ int n_start, int n_end)
+{
+ BDRVQcowState *s = bs->opaque;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ int n, ret;
+
+ /*
+ * If this is the last cluster and it is only partially used, we must only
+ * copy until the end of the image, or bdrv_check_request will fail for the
+ * bdrv_read/write calls below.
+ */
+ if (start_sect + n_end > bs->total_sectors) {
+ n_end = bs->total_sectors - start_sect;
+ }
+
+ n = n_end - n_start;
+ if (n <= 0) {
+ return 0;
+ }
+
+ iov.iov_len = n * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
+
+ /* Call .bdrv_co_readv() directly instead of using the public block-layer
+ * interface. This avoids double I/O throttling and request tracking,
+ * which can lead to deadlock when block layer copy-on-read is enabled.
+ */
+ ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (s->crypt_method) {
+ qcow2_encrypt_sectors(s, start_sect + n_start,
+ iov.iov_base, iov.iov_base, n, 1,
+ &s->aes_encrypt_key);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
+ ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0;
+out:
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
+
+/*
+ * get_cluster_offset
+ *
+ * For a given offset of the disk image, find the cluster offset in
+ * qcow2 file. The offset is stored in *cluster_offset.
+ *
+ * on entry, *num is the number of contiguous sectors we'd like to
+ * access following offset.
+ *
+ * on exit, *num is the number of contiguous sectors we can read.
+ *
+ * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
+ * cases.
+ */
+int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int *num, uint64_t *cluster_offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int l2_index;
+ uint64_t l1_index, l2_offset, *l2_table;
+ int l1_bits, c;
+ unsigned int index_in_cluster, nb_clusters;
+ uint64_t nb_available, nb_needed;
+ int ret;
+
+ index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
+ nb_needed = *num + index_in_cluster;
+
+ l1_bits = s->l2_bits + s->cluster_bits;
+
+ /* compute how many bytes there are between the offset and
+ * the end of the l1 entry
+ */
+
+ nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
+
+ /* compute the number of available sectors */
+
+ nb_available = (nb_available >> 9) + index_in_cluster;
+
+ if (nb_needed > nb_available) {
+ nb_needed = nb_available;
+ }
+
+ *cluster_offset = 0;
+
+ /* seek the the l2 offset in the l1 table */
+
+ l1_index = offset >> l1_bits;
+ if (l1_index >= s->l1_size) {
+ ret = QCOW2_CLUSTER_UNALLOCATED;
+ goto out;
+ }
+
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+ if (!l2_offset) {
+ ret = QCOW2_CLUSTER_UNALLOCATED;
+ goto out;
+ }
+
+ /* load the l2 table in memory */
+
+ ret = l2_load(bs, l2_offset, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* find the cluster offset for the given disk offset */
+
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+ *cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ nb_clusters = size_to_clusters(s, nb_needed << 9);
+
+ ret = qcow2_get_cluster_type(*cluster_offset);
+ switch (ret) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* Compressed clusters can only be processed one by one */
+ c = 1;
+ *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
+ break;
+ case QCOW2_CLUSTER_ZERO:
+ if (s->qcow_version < 3) {
+ return -EIO;
+ }
+ c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ *cluster_offset = 0;
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ /* how many empty clusters ? */
+ c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
+ *cluster_offset = 0;
+ break;
+ case QCOW2_CLUSTER_NORMAL:
+ /* how many allocated clusters ? */
+ c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ *cluster_offset &= L2E_OFFSET_MASK;
+ break;
+ default:
+ abort();
+ }
+
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+
+ nb_available = (c * s->cluster_sectors);
+
+out:
+ if (nb_available > nb_needed)
+ nb_available = nb_needed;
+
+ *num = nb_available - index_in_cluster;
+
+ return ret;
+}
+
+/*
+ * get_cluster_table
+ *
+ * for a given disk offset, load (and allocate if needed)
+ * the l2 table.
+ *
+ * the l2 table offset in the qcow2 file and the cluster index
+ * in the l2 table are given to the caller.
+ *
+ * Returns 0 on success, -errno in failure case
+ */
+static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
+ uint64_t **new_l2_table,
+ int *new_l2_index)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int l2_index;
+ uint64_t l1_index, l2_offset;
+ uint64_t *l2_table = NULL;
+ int ret;
+
+ /* seek the the l2 offset in the l1 table */
+
+ l1_index = offset >> (s->l2_bits + s->cluster_bits);
+ if (l1_index >= s->l1_size) {
+ ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ assert(l1_index < s->l1_size);
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+
+ /* seek the l2 table of the given l2 offset */
+
+ if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
+ /* load the l2 table in memory */
+ ret = l2_load(bs, l2_offset, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ /* First allocate a new L2 table (and do COW if needed) */
+ ret = l2_allocate(bs, l1_index, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Then decrease the refcount of the old table */
+ if (l2_offset) {
+ qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ }
+ }
+
+ /* find the cluster offset for the given disk offset */
+
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+
+ *new_l2_table = l2_table;
+ *new_l2_index = l2_index;
+
+ return 0;
+}
+
+/*
+ * alloc_compressed_cluster_offset
+ *
+ * For a given offset of the disk image, return cluster offset in
+ * qcow2 file.
+ *
+ * If the offset is not found, allocate a new compressed cluster.
+ *
+ * Return the cluster offset if successful,
+ * Return 0, otherwise.
+ *
+ */
+
+uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
+ uint64_t offset,
+ int compressed_size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index, ret;
+ uint64_t *l2_table;
+ int64_t cluster_offset;
+ int nb_csectors;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return 0;
+ }
+
+ /* Compression can't overwrite anything. Fail if the cluster was already
+ * allocated. */
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ if (cluster_offset & L2E_OFFSET_MASK) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ return 0;
+ }
+
+ cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
+ if (cluster_offset < 0) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ return 0;
+ }
+
+ nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
+ (cluster_offset >> 9);
+
+ cluster_offset |= QCOW_OFLAG_COMPRESSED |
+ ((uint64_t)nb_csectors << s->csize_shift);
+
+ /* update L2 table */
+
+ /* compressed clusters never have the copied flag */
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ l2_table[l2_index] = cpu_to_be64(cluster_offset);
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return 0;
+ }
+
+ return cluster_offset;
+}
+
+static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ if (r->nb_sectors == 0) {
+ return 0;
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+ ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
+ r->offset / BDRV_SECTOR_SIZE,
+ r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+ qemu_co_mutex_lock(&s->lock);
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * Before we update the L2 table to actually point to the new cluster, we
+ * need to be sure that the refcounts have been increased and COW was
+ * handled.
+ */
+ qcow2_cache_depends_on_flush(s->l2_table_cache);
+
+ return 0;
+}
+
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, j = 0, l2_index, ret;
+ uint64_t *old_cluster, *l2_table;
+ uint64_t cluster_offset = m->alloc_offset;
+
+ trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+ assert(m->nb_clusters > 0);
+
+ old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+
+ /* copy content of unmodified sectors */
+ ret = perform_cow(bs, m, &m->cow_start);
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = perform_cow(bs, m, &m->cow_end);
+ if (ret < 0) {
+ goto err;
+ }
+
+ /* Update L2 table. */
+ if (s->use_lazy_refcounts) {
+ qcow2_mark_dirty(bs);
+ }
+ if (qcow2_need_accurate_refcounts(s)) {
+ qcow2_cache_set_dependency(bs, s->l2_table_cache,
+ s->refcount_block_cache);
+ }
+
+ ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ goto err;
+ }
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+
+ for (i = 0; i < m->nb_clusters; i++) {
+ /* if two concurrent writes happen to the same unallocated cluster
+ * each write allocates separate cluster and writes data concurrently.
+ * The first one to complete updates l2 table with pointer to its
+ * cluster the second one has to do RMW (which is done above by
+ * copy_sectors()), update l2 table with its cluster pointer and free
+ * old cluster. This is what this loop does */
+ if(l2_table[l2_index + i] != 0)
+ old_cluster[j++] = l2_table[l2_index + i];
+
+ l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
+ (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
+ }
+
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ goto err;
+ }
+
+ /*
+ * If this was a COW, we need to decrease the refcount of the old cluster.
+ * Also flush bs->file to get the right order for L2 and refcount update.
+ *
+ * Don't discard clusters that reach a refcount of 0 (e.g. compressed
+ * clusters), the next write will reuse them anyway.
+ */
+ if (j != 0) {
+ for (i = 0; i < j; i++) {
+ qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
+ QCOW2_DISCARD_NEVER);
+ }
+ }
+
+ ret = 0;
+err:
+ g_free(old_cluster);
+ return ret;
+ }
+
+/*
+ * Returns the number of contiguous clusters that can be used for an allocating
+ * write, but require COW to be performed (this includes yet unallocated space,
+ * which must copy from the backing file)
+ */
+static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+ uint64_t *l2_table, int l2_index)
+{
+ int i;
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+ int cluster_type = qcow2_get_cluster_type(l2_entry);
+
+ switch(cluster_type) {
+ case QCOW2_CLUSTER_NORMAL:
+ if (l2_entry & QCOW_OFLAG_COPIED) {
+ goto out;
+ }
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ case QCOW2_CLUSTER_COMPRESSED:
+ case QCOW2_CLUSTER_ZERO:
+ break;
+ default:
+ abort();
+ }
+ }
+
+out:
+ assert(i <= nb_clusters);
+ return i;
+}
+
+/*
+ * Check if there already is an AIO write request in flight which allocates
+ * the same cluster. In this case we need to wait until the previous
+ * request has completed and updated the L2 table accordingly.
+ *
+ * Returns:
+ * 0 if there was no dependency. *cur_bytes indicates the number of
+ * bytes from guest_offset that can be read before the next
+ * dependency must be processed (or the request is complete)
+ *
+ * -EAGAIN if we had to wait for another request, previously gathered
+ * information on cluster allocation may be invalid now. The caller
+ * must start over anyway, so consider *cur_bytes undefined.
+ */
+static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *cur_bytes, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowL2Meta *old_alloc;
+ uint64_t bytes = *cur_bytes;
+
+ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
+
+ uint64_t start = guest_offset;
+ uint64_t end = start + bytes;
+ uint64_t old_start = l2meta_cow_start(old_alloc);
+ uint64_t old_end = l2meta_cow_end(old_alloc);
+
+ if (end <= old_start || start >= old_end) {
+ /* No intersection */
+ } else {
+ if (start < old_start) {
+ /* Stop at the start of a running allocation */
+ bytes = old_start - start;
+ } else {
+ bytes = 0;
+ }
+
+ /* Stop if already an l2meta exists. After yielding, it wouldn't
+ * be valid any more, so we'd have to clean up the old L2Metas
+ * and deal with requests depending on them before starting to
+ * gather new ones. Not worth the trouble. */
+ if (bytes == 0 && *m) {
+ *cur_bytes = 0;
+ return 0;
+ }
+
+ if (bytes == 0) {
+ /* Wait for the dependency to complete. We need to recheck
+ * the free/allocated clusters when we continue. */
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_co_queue_wait(&old_alloc->dependent_requests);
+ qemu_co_mutex_lock(&s->lock);
+ return -EAGAIN;
+ }
+ }
+ }
+
+ /* Make sure that existing clusters and new allocations are only used up to
+ * the next dependency if we shortened the request above */
+ *cur_bytes = bytes;
+
+ return 0;
+}
+
+/*
+ * Checks how many already allocated clusters that don't require a copy on
+ * write there are at the given guest_offset (up to *bytes). If
+ * *host_offset is not zero, only physically contiguous clusters beginning at
+ * this host offset are counted.
+ *
+ * Note that guest_offset may not be cluster aligned. In this case, the
+ * returned *host_offset points to exact byte referenced by guest_offset and
+ * therefore isn't cluster aligned as well.
+ *
+ * Returns:
+ * 0: if no allocated clusters are available at the given offset.
+ * *bytes is normally unchanged. It is set to 0 if the cluster
+ * is allocated and doesn't need COW, but doesn't have the right
+ * physical offset.
+ *
+ * 1: if allocated clusters that don't require a COW are available at
+ * the requested offset. *bytes may have decreased and describes
+ * the length of the area that can be written to.
+ *
+ * -errno: in error cases
+ */
+static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index;
+ uint64_t cluster_offset;
+ uint64_t *l2_table;
+ unsigned int nb_clusters;
+ unsigned int keep_clusters;
+ int ret, pret;
+
+ trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
+ *bytes);
+
+ assert(*host_offset == 0 || offset_into_cluster(s, guest_offset)
+ == offset_into_cluster(s, *host_offset));
+
+ /*
+ * Calculate the number of clusters to look for. We stop at L2 table
+ * boundaries to keep things simple.
+ */
+ nb_clusters =
+ size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
+
+ l2_index = offset_to_l2_index(s, guest_offset);
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ /* Find L2 entry for the first involved cluster */
+ ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+
+ /* Check how many clusters are already allocated and don't need COW */
+ if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
+ && (cluster_offset & QCOW_OFLAG_COPIED))
+ {
+ /* If a specific host_offset is required, check it */
+ bool offset_matches =
+ (cluster_offset & L2E_OFFSET_MASK) == *host_offset;
+
+ if (*host_offset != 0 && !offset_matches) {
+ *bytes = 0;
+ ret = 0;
+ goto out;
+ }
+
+ /* We keep all QCOW_OFLAG_COPIED clusters */
+ keep_clusters =
+ count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
+ assert(keep_clusters <= nb_clusters);
+
+ *bytes = MIN(*bytes,
+ keep_clusters * s->cluster_size
+ - offset_into_cluster(s, guest_offset));
+
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+
+ /* Cleanup */
+out:
+ pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (pret < 0) {
+ return pret;
+ }
+
+ /* Only return a host offset if we actually made progress. Otherwise we
+ * would make requirements for handle_alloc() that it can't fulfill */
+ if (ret) {
+ *host_offset = (cluster_offset & L2E_OFFSET_MASK)
+ + offset_into_cluster(s, guest_offset);
+ }
+
+ return ret;
+}
+
+/*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, unsigned int *nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+ *host_offset, *nb_clusters);
+
+ /* Allocate new clusters */
+ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
+ if (*host_offset == 0) {
+ int64_t cluster_offset =
+ qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
+ if (cluster_offset < 0) {
+ return cluster_offset;
+ }
+ *host_offset = cluster_offset;
+ return 0;
+ } else {
+ int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
+ if (ret < 0) {
+ return ret;
+ }
+ *nb_clusters = ret;
+ return 0;
+ }
+}
+
+/*
+ * Allocates new clusters for an area that either is yet unallocated or needs a
+ * copy on write. If *host_offset is non-zero, clusters are only allocated if
+ * the new allocation can match the specified host offset.
+ *
+ * Note that guest_offset may not be cluster aligned. In this case, the
+ * returned *host_offset points to exact byte referenced by guest_offset and
+ * therefore isn't cluster aligned as well.
+ *
+ * Returns:
+ * 0: if no clusters could be allocated. *bytes is set to 0,
+ * *host_offset is left unchanged.
+ *
+ * 1: if new clusters were allocated. *bytes may be decreased if the
+ * new allocation doesn't cover all of the requested area.
+ * *host_offset is updated to contain the host offset of the first
+ * newly allocated cluster.
+ *
+ * -errno: in error cases
+ */
+static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index;
+ uint64_t *l2_table;
+ uint64_t entry;
+ unsigned int nb_clusters;
+ int ret;
+
+ uint64_t alloc_cluster_offset;
+
+ trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
+ *bytes);
+ assert(*bytes > 0);
+
+ /*
+ * Calculate the number of clusters to look for. We stop at L2 table
+ * boundaries to keep things simple.
+ */
+ nb_clusters =
+ size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
+
+ l2_index = offset_to_l2_index(s, guest_offset);
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ /* Find L2 entry for the first involved cluster */
+ ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ entry = be64_to_cpu(l2_table[l2_index]);
+
+ /* For the moment, overwrite compressed clusters one by one */
+ if (entry & QCOW_OFLAG_COMPRESSED) {
+ nb_clusters = 1;
+ } else {
+ nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+ }
+
+ /* This function is only called when there were no non-COW clusters, so if
+ * we can't find any unallocated or COW clusters either, something is
+ * wrong with our code. */
+ assert(nb_clusters > 0);
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Allocate, if necessary at a given offset in the image file */
+ alloc_cluster_offset = start_of_cluster(s, *host_offset);
+ ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
+ &nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Can't extend contiguous allocation */
+ if (nb_clusters == 0) {
+ *bytes = 0;
+ return 0;
+ }
+
+ /*
+ * Save info needed for meta data update.
+ *
+ * requested_sectors: Number of sectors from the start of the first
+ * newly allocated cluster to the end of the (possibly shortened
+ * before) write request.
+ *
+ * avail_sectors: Number of sectors from the start of the first
+ * newly allocated to the end of the last newly allocated cluster.
+ *
+ * nb_sectors: The number of sectors from the start of the first
+ * newly allocated cluster to the end of the area that the write
+ * request actually writes to (excluding COW at the end)
+ */
+ int requested_sectors =
+ (*bytes + offset_into_cluster(s, guest_offset))
+ >> BDRV_SECTOR_BITS;
+ int avail_sectors = nb_clusters
+ << (s->cluster_bits - BDRV_SECTOR_BITS);
+ int alloc_n_start = offset_into_cluster(s, guest_offset)
+ >> BDRV_SECTOR_BITS;
+ int nb_sectors = MIN(requested_sectors, avail_sectors);
+ QCowL2Meta *old_m = *m;
+
+ *m = g_malloc0(sizeof(**m));
+
+ **m = (QCowL2Meta) {
+ .next = old_m,
+
+ .alloc_offset = alloc_cluster_offset,
+ .offset = start_of_cluster(s, guest_offset),
+ .nb_clusters = nb_clusters,
+ .nb_available = nb_sectors,
+
+ .cow_start = {
+ .offset = 0,
+ .nb_sectors = alloc_n_start,
+ },
+ .cow_end = {
+ .offset = nb_sectors * BDRV_SECTOR_SIZE,
+ .nb_sectors = avail_sectors - nb_sectors,
+ },
+ };
+ qemu_co_queue_init(&(*m)->dependent_requests);
+ QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
+
+ *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
+ *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
+ - offset_into_cluster(s, guest_offset));
+ assert(*bytes != 0);
+
+ return 1;
+
+fail:
+ if (*m && (*m)->nb_clusters > 0) {
+ QLIST_REMOVE(*m, next_in_flight);
+ }
+ return ret;
+}
+
+/*
+ * alloc_cluster_offset
+ *
+ * For a given offset on the virtual disk, find the cluster offset in qcow2
+ * file. If the offset is not found, allocate a new cluster.
+ *
+ * If the cluster was already allocated, m->nb_clusters is set to 0 and
+ * other fields in m are meaningless.
+ *
+ * If the cluster is newly allocated, m->nb_clusters is set to the number of
+ * contiguous clusters that have been allocated. In this case, the other
+ * fields of m are valid and contain information about the first allocated
+ * cluster.
+ *
+ * If the request conflicts with another write request in flight, the coroutine
+ * is queued and will be reentered when the dependency has completed.
+ *
+ * Return 0 on success and -errno in error cases
+ */
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t start, remaining;
+ uint64_t cluster_offset;
+ uint64_t cur_bytes;
+ int ret;
+
+ trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
+ n_start, n_end);
+
+ assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
+ offset = start_of_cluster(s, offset);
+
+again:
+ start = offset + (n_start << BDRV_SECTOR_BITS);
+ remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
+ cluster_offset = 0;
+ *host_offset = 0;
+ cur_bytes = 0;
+ *m = NULL;
+
+ while (true) {
+
+ if (!*host_offset) {
+ *host_offset = start_of_cluster(s, cluster_offset);
+ }
+
+ assert(remaining >= cur_bytes);
+
+ start += cur_bytes;
+ remaining -= cur_bytes;
+ cluster_offset += cur_bytes;
+
+ if (remaining == 0) {
+ break;
+ }
+
+ cur_bytes = remaining;
+
+ /*
+ * Now start gathering as many contiguous clusters as possible:
+ *
+ * 1. Check for overlaps with in-flight allocations
+ *
+ * a) Overlap not in the first cluster -> shorten this request and
+ * let the caller handle the rest in its next loop iteration.
+ *
+ * b) Real overlaps of two requests. Yield and restart the search
+ * for contiguous clusters (the situation could have changed
+ * while we were sleeping)
+ *
+ * c) TODO: Request starts in the same cluster as the in-flight
+ * allocation ends. Shorten the COW of the in-fight allocation,
+ * set cluster_offset to write to the same cluster and set up
+ * the right synchronisation between the in-flight request and
+ * the new one.
+ */
+ ret = handle_dependencies(bs, start, &cur_bytes, m);
+ if (ret == -EAGAIN) {
+ /* Currently handle_dependencies() doesn't yield if we already had
+ * an allocation. If it did, we would have to clean up the L2Meta
+ * structs before starting over. */
+ assert(*m == NULL);
+ goto again;
+ } else if (ret < 0) {
+ return ret;
+ } else if (cur_bytes == 0) {
+ break;
+ } else {
+ /* handle_dependencies() may have decreased cur_bytes (shortened
+ * the allocations below) so that the next dependency is processed
+ * correctly during the next loop iteration. */
+ }
+
+ /*
+ * 2. Count contiguous COPIED clusters.
+ */
+ ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ continue;
+ } else if (cur_bytes == 0) {
+ break;
+ }
+
+ /*
+ * 3. If the request still hasn't completed, allocate new clusters,
+ * considering any cluster_offset of steps 1c or 2.
+ */
+ ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ continue;
+ } else {
+ assert(cur_bytes == 0);
+ break;
+ }
+ }
+
+ *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
+ assert(*num > 0);
+ assert(*host_offset != 0);
+
+ return 0;
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+ const uint8_t *buf, int buf_size)
+{
+ z_stream strm1, *strm = &strm1;
+ int ret, out_len;
+
+ memset(strm, 0, sizeof(*strm));
+
+ strm->next_in = (uint8_t *)buf;
+ strm->avail_in = buf_size;
+ strm->next_out = out_buf;
+ strm->avail_out = out_buf_size;
+
+ ret = inflateInit2(strm, -12);
+ if (ret != Z_OK)
+ return -1;
+ ret = inflate(strm, Z_FINISH);
+ out_len = strm->next_out - out_buf;
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+ out_len != out_buf_size) {
+ inflateEnd(strm);
+ return -1;
+ }
+ inflateEnd(strm);
+ return 0;
+}
+
+int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, csize, nb_csectors, sector_offset;
+ uint64_t coffset;
+
+ coffset = cluster_offset & s->cluster_offset_mask;
+ if (s->cluster_cache_offset != coffset) {
+ nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
+ sector_offset = coffset & 511;
+ csize = nb_csectors * 512 - sector_offset;
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
+ ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
+ if (ret < 0) {
+ return ret;
+ }
+ if (decompress_buffer(s->cluster_cache, s->cluster_size,
+ s->cluster_data + sector_offset, csize) < 0) {
+ return -EIO;
+ }
+ s->cluster_cache_offset = coffset;
+ }
+ return 0;
+}
+
+/*
+ * This discards as many clusters of nb_clusters as possible at once (i.e.
+ * all clusters in the same L2 table) and returns the number of discarded
+ * clusters.
+ */
+static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
+ unsigned int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table;
+ int l2_index;
+ int ret;
+ int i;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Limit nb_clusters to one L2 table */
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t old_offset;
+
+ old_offset = be64_to_cpu(l2_table[l2_index + i]);
+ if ((old_offset & L2E_OFFSET_MASK) == 0) {
+ continue;
+ }
+
+ /* First remove L2 entries */
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ l2_table[l2_index + i] = cpu_to_be64(0);
+
+ /* Then decrease the refcount */
+ qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return nb_clusters;
+}
+
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+ int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t end_offset;
+ unsigned int nb_clusters;
+ int ret;
+
+ end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
+
+ /* Round start up and end down */
+ offset = align_offset(offset, s->cluster_size);
+ end_offset &= ~(s->cluster_size - 1);
+
+ if (offset > end_offset) {
+ return 0;
+ }
+
+ nb_clusters = size_to_clusters(s, end_offset - offset);
+
+ s->cache_discards = true;
+
+ /* Each L2 table is handled by its own loop iteration */
+ while (nb_clusters > 0) {
+ ret = discard_single_l2(bs, offset, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ nb_clusters -= ret;
+ offset += (ret * s->cluster_size);
+ }
+
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
+}
+
+/*
+ * This zeroes as many clusters of nb_clusters as possible at once (i.e.
+ * all clusters in the same L2 table) and returns the number of zeroed
+ * clusters.
+ */
+static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
+ unsigned int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table;
+ int l2_index;
+ int ret;
+ int i;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Limit nb_clusters to one L2 table */
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t old_offset;
+
+ old_offset = be64_to_cpu(l2_table[l2_index + i]);
+
+ /* Update L2 entries */
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ if (old_offset & QCOW_OFLAG_COMPRESSED) {
+ l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+ qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
+ } else {
+ l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
+ }
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return nb_clusters;
+}
+
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int nb_clusters;
+ int ret;
+
+ /* The zero flag is only supported by version 3 and newer */
+ if (s->qcow_version < 3) {
+ return -ENOTSUP;
+ }
+
+ /* Each L2 table is handled by its own loop iteration */
+ nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
+
+ s->cache_discards = true;
+
+ while (nb_clusters > 0) {
+ ret = zero_single_l2(bs, offset, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ nb_clusters -= ret;
+ offset += (ret * s->cluster_size);
+ }
+
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
+}
diff --git a/contrib/qemu/block/qcow2-refcount.c b/contrib/qemu/block/qcow2-refcount.c
new file mode 100644
index 000000000..1244693f3
--- /dev/null
+++ b/contrib/qemu/block/qcow2-refcount.c
@@ -0,0 +1,1374 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
+static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
+ int64_t offset, int64_t length,
+ int addend, enum qcow2_discard_type type);
+
+
+/*********************************************************/
+/* refcount handling */
+
+int qcow2_refcount_init(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, refcount_table_size2, i;
+
+ refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
+ s->refcount_table = g_malloc(refcount_table_size2);
+ if (s->refcount_table_size > 0) {
+ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
+ ret = bdrv_pread(bs->file, s->refcount_table_offset,
+ s->refcount_table, refcount_table_size2);
+ if (ret != refcount_table_size2)
+ goto fail;
+ for(i = 0; i < s->refcount_table_size; i++)
+ be64_to_cpus(&s->refcount_table[i]);
+ }
+ return 0;
+ fail:
+ return -ENOMEM;
+}
+
+void qcow2_refcount_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ g_free(s->refcount_table);
+}
+
+
+static int load_refcount_block(BlockDriverState *bs,
+ int64_t refcount_block_offset,
+ void **refcount_block)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+ refcount_block);
+
+ return ret;
+}
+
+/*
+ * Returns the refcount of the cluster given by its index. Any non-negative
+ * return value is the refcount of the cluster, negative values are -errno
+ * and indicate an error.
+ */
+static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
+{
+ BDRVQcowState *s = bs->opaque;
+ int refcount_table_index, block_index;
+ int64_t refcount_block_offset;
+ int ret;
+ uint16_t *refcount_block;
+ uint16_t refcount;
+
+ refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+ if (refcount_table_index >= s->refcount_table_size)
+ return 0;
+ refcount_block_offset = s->refcount_table[refcount_table_index];
+ if (!refcount_block_offset)
+ return 0;
+
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+ (void**) &refcount_block);
+ if (ret < 0) {
+ return ret;
+ }
+
+ block_index = cluster_index &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+ refcount = be16_to_cpu(refcount_block[block_index]);
+
+ ret = qcow2_cache_put(bs, s->refcount_block_cache,
+ (void**) &refcount_block);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return refcount;
+}
+
+/*
+ * Rounds the refcount table size up to avoid growing the table for each single
+ * refcount block that is allocated.
+ */
+static unsigned int next_refcount_table_size(BDRVQcowState *s,
+ unsigned int min_size)
+{
+ unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
+ unsigned int refcount_table_clusters =
+ MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
+
+ while (min_clusters > refcount_table_clusters) {
+ refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
+ }
+
+ return refcount_table_clusters << (s->cluster_bits - 3);
+}
+
+
+/* Checks if two offsets are described by the same refcount block */
+static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
+ uint64_t offset_b)
+{
+ uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
+ uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
+
+ return (block_a == block_b);
+}
+
+/*
+ * Loads a refcount block. If it doesn't exist yet, it is allocated first
+ * (including growing the refcount table if needed).
+ *
+ * Returns 0 on success or -errno in error case
+ */
+static int alloc_refcount_block(BlockDriverState *bs,
+ int64_t cluster_index, uint16_t **refcount_block)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int refcount_table_index;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
+
+ /* Find the refcount block for the given cluster */
+ refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+
+ if (refcount_table_index < s->refcount_table_size) {
+
+ uint64_t refcount_block_offset =
+ s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
+
+ /* If it's already there, we're done */
+ if (refcount_block_offset) {
+ return load_refcount_block(bs, refcount_block_offset,
+ (void**) refcount_block);
+ }
+ }
+
+ /*
+ * If we came here, we need to allocate something. Something is at least
+ * a cluster for the new refcount block. It may also include a new refcount
+ * table if the old refcount table is too small.
+ *
+ * Note that allocating clusters here needs some special care:
+ *
+ * - We can't use the normal qcow2_alloc_clusters(), it would try to
+ * increase the refcount and very likely we would end up with an endless
+ * recursion. Instead we must place the refcount blocks in a way that
+ * they can describe them themselves.
+ *
+ * - We need to consider that at this point we are inside update_refcounts
+ * and doing the initial refcount increase. This means that some clusters
+ * have already been allocated by the caller, but their refcount isn't
+ * accurate yet. free_cluster_index tells us where this allocation ends
+ * as long as we don't overwrite it by freeing clusters.
+ *
+ * - alloc_clusters_noref and qcow2_free_clusters may load a different
+ * refcount block into the cache
+ */
+
+ *refcount_block = NULL;
+
+ /* We write to the refcount table, so we might depend on L2 tables */
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Allocate the refcount block itself and mark it as used */
+ int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
+ if (new_block < 0) {
+ return new_block;
+ }
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
+ " at %" PRIx64 "\n",
+ refcount_table_index, cluster_index << s->cluster_bits, new_block);
+#endif
+
+ if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
+ /* Zero the new refcount block before updating it */
+ ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
+ (void**) refcount_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ memset(*refcount_block, 0, s->cluster_size);
+
+ /* The block describes itself, need to update the cache */
+ int block_index = (new_block >> s->cluster_bits) &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+ (*refcount_block)[block_index] = cpu_to_be16(1);
+ } else {
+ /* Described somewhere else. This can recurse at most twice before we
+ * arrive at a block that describes itself. */
+ ret = update_refcount(bs, new_block, s->cluster_size, 1,
+ QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /* Initialize the new refcount block only after updating its refcount,
+ * update_refcount uses the refcount cache itself */
+ ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
+ (void**) refcount_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ memset(*refcount_block, 0, s->cluster_size);
+ }
+
+ /* Now the new refcount block needs to be written to disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /* If the refcount table is big enough, just hook the block up there */
+ if (refcount_table_index < s->refcount_table_size) {
+ uint64_t data64 = cpu_to_be64(new_block);
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
+ ret = bdrv_pwrite_sync(bs->file,
+ s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
+ &data64, sizeof(data64));
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ s->refcount_table[refcount_table_index] = new_block;
+ return 0;
+ }
+
+ ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /*
+ * If we come here, we need to grow the refcount table. Again, a new
+ * refcount table needs some space and we can't simply allocate to avoid
+ * endless recursion.
+ *
+ * Therefore let's grab new refcount blocks at the end of the image, which
+ * will describe themselves and the new refcount table. This way we can
+ * reference them only in the new table and do the switch to the new
+ * refcount table at once without producing an inconsistent state in
+ * between.
+ */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
+
+ /* Calculate the number of refcount blocks needed so far */
+ uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
+ uint64_t blocks_used = (s->free_cluster_index +
+ refcount_block_clusters - 1) / refcount_block_clusters;
+
+ /* And now we need at least one block more for the new metadata */
+ uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
+ uint64_t last_table_size;
+ uint64_t blocks_clusters;
+ do {
+ uint64_t table_clusters =
+ size_to_clusters(s, table_size * sizeof(uint64_t));
+ blocks_clusters = 1 +
+ ((table_clusters + refcount_block_clusters - 1)
+ / refcount_block_clusters);
+ uint64_t meta_clusters = table_clusters + blocks_clusters;
+
+ last_table_size = table_size;
+ table_size = next_refcount_table_size(s, blocks_used +
+ ((meta_clusters + refcount_block_clusters - 1)
+ / refcount_block_clusters));
+
+ } while (last_table_size != table_size);
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
+ s->refcount_table_size, table_size);
+#endif
+
+ /* Create the new refcount table and blocks */
+ uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
+ s->cluster_size;
+ uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
+ uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
+ uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
+
+ assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
+
+ /* Fill the new refcount table */
+ memcpy(new_table, s->refcount_table,
+ s->refcount_table_size * sizeof(uint64_t));
+ new_table[refcount_table_index] = new_block;
+
+ int i;
+ for (i = 0; i < blocks_clusters; i++) {
+ new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
+ }
+
+ /* Fill the refcount blocks */
+ uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
+ int block = 0;
+ for (i = 0; i < table_clusters + blocks_clusters; i++) {
+ new_blocks[block++] = cpu_to_be16(1);
+ }
+
+ /* Write refcount blocks to disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
+ ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+ blocks_clusters * s->cluster_size);
+ g_free(new_blocks);
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ /* Write refcount table to disk */
+ for(i = 0; i < table_size; i++) {
+ cpu_to_be64s(&new_table[i]);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
+ ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
+ table_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ for(i = 0; i < table_size; i++) {
+ be64_to_cpus(&new_table[i]);
+ }
+
+ /* Hook up the new refcount table in the qcow2 header */
+ uint8_t data[12];
+ cpu_to_be64w((uint64_t*)data, table_offset);
+ cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
+ data, sizeof(data));
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ /* And switch it in memory */
+ uint64_t old_table_offset = s->refcount_table_offset;
+ uint64_t old_table_size = s->refcount_table_size;
+
+ g_free(s->refcount_table);
+ s->refcount_table = new_table;
+ s->refcount_table_size = table_size;
+ s->refcount_table_offset = table_offset;
+
+ /* Free old table. Remember, we must not change free_cluster_index */
+ uint64_t old_free_cluster_index = s->free_cluster_index;
+ qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ s->free_cluster_index = old_free_cluster_index;
+
+ ret = load_refcount_block(bs, new_block, (void**) refcount_block);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+
+fail_table:
+ g_free(new_table);
+fail_block:
+ if (*refcount_block != NULL) {
+ qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
+ }
+ return ret;
+}
+
+void qcow2_process_discards(BlockDriverState *bs, int ret)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2DiscardRegion *d, *next;
+
+ QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
+ QTAILQ_REMOVE(&s->discards, d, next);
+
+ /* Discard is optional, ignore the return value */
+ if (ret >= 0) {
+ bdrv_discard(bs->file,
+ d->offset >> BDRV_SECTOR_BITS,
+ d->bytes >> BDRV_SECTOR_BITS);
+ }
+
+ g_free(d);
+ }
+}
+
+static void update_refcount_discard(BlockDriverState *bs,
+ uint64_t offset, uint64_t length)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2DiscardRegion *d, *p, *next;
+
+ QTAILQ_FOREACH(d, &s->discards, next) {
+ uint64_t new_start = MIN(offset, d->offset);
+ uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
+
+ if (new_end - new_start <= length + d->bytes) {
+ /* There can't be any overlap, areas ending up here have no
+ * references any more and therefore shouldn't get freed another
+ * time. */
+ assert(d->bytes + length == new_end - new_start);
+ d->offset = new_start;
+ d->bytes = new_end - new_start;
+ goto found;
+ }
+ }
+
+ d = g_malloc(sizeof(*d));
+ *d = (Qcow2DiscardRegion) {
+ .bs = bs,
+ .offset = offset,
+ .bytes = length,
+ };
+ QTAILQ_INSERT_TAIL(&s->discards, d, next);
+
+found:
+ /* Merge discard requests if they are adjacent now */
+ QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
+ if (p == d
+ || p->offset > d->offset + d->bytes
+ || d->offset > p->offset + p->bytes)
+ {
+ continue;
+ }
+
+ /* Still no overlap possible */
+ assert(p->offset == d->offset + d->bytes
+ || d->offset == p->offset + p->bytes);
+
+ QTAILQ_REMOVE(&s->discards, p, next);
+ d->offset = MIN(d->offset, p->offset);
+ d->bytes += p->bytes;
+ }
+}
+
+/* XXX: cache several refcount block clusters ? */
+static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
+ int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t start, last, cluster_offset;
+ uint16_t *refcount_block = NULL;
+ int64_t old_table_index = -1;
+ int ret;
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
+ offset, length, addend);
+#endif
+ if (length < 0) {
+ return -EINVAL;
+ } else if (length == 0) {
+ return 0;
+ }
+
+ if (addend < 0) {
+ qcow2_cache_set_dependency(bs, s->refcount_block_cache,
+ s->l2_table_cache);
+ }
+
+ start = offset & ~(s->cluster_size - 1);
+ last = (offset + length - 1) & ~(s->cluster_size - 1);
+ for(cluster_offset = start; cluster_offset <= last;
+ cluster_offset += s->cluster_size)
+ {
+ int block_index, refcount;
+ int64_t cluster_index = cluster_offset >> s->cluster_bits;
+ int64_t table_index =
+ cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+
+ /* Load the refcount block and allocate it if needed */
+ if (table_index != old_table_index) {
+ if (refcount_block) {
+ ret = qcow2_cache_put(bs, s->refcount_block_cache,
+ (void**) &refcount_block);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ old_table_index = table_index;
+
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
+
+ /* we can update the count and save it */
+ block_index = cluster_index &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+
+ refcount = be16_to_cpu(refcount_block[block_index]);
+ refcount += addend;
+ if (refcount < 0 || refcount > 0xffff) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (refcount == 0 && cluster_index < s->free_cluster_index) {
+ s->free_cluster_index = cluster_index;
+ }
+ refcount_block[block_index] = cpu_to_be16(refcount);
+
+ if (refcount == 0 && s->discard_passthrough[type]) {
+ update_refcount_discard(bs, cluster_offset, s->cluster_size);
+ }
+ }
+
+ ret = 0;
+fail:
+ if (!s->cache_discards) {
+ qcow2_process_discards(bs, ret);
+ }
+
+ /* Write last changed block to disk */
+ if (refcount_block) {
+ int wret;
+ wret = qcow2_cache_put(bs, s->refcount_block_cache,
+ (void**) &refcount_block);
+ if (wret < 0) {
+ return ret < 0 ? ret : wret;
+ }
+ }
+
+ /*
+ * Try do undo any updates if an error is returned (This may succeed in
+ * some cases like ENOSPC for allocating a new refcount block)
+ */
+ if (ret < 0) {
+ int dummy;
+ dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
+ QCOW2_DISCARD_NEVER);
+ (void)dummy;
+ }
+
+ return ret;
+}
+
+/*
+ * Increases or decreases the refcount of a given cluster by one.
+ * addend must be 1 or -1.
+ *
+ * If the return value is non-negative, it is the new refcount of the cluster.
+ * If it is negative, it is -errno and indicates an error.
+ */
+static int update_cluster_refcount(BlockDriverState *bs,
+ int64_t cluster_index,
+ int addend,
+ enum qcow2_discard_type type)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
+ type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return get_refcount(bs, cluster_index);
+}
+
+
+
+/*********************************************************/
+/* cluster allocation functions */
+
+
+
+/* return < 0 if error */
+static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, nb_clusters, refcount;
+
+ nb_clusters = size_to_clusters(s, size);
+retry:
+ for(i = 0; i < nb_clusters; i++) {
+ int64_t next_cluster_index = s->free_cluster_index++;
+ refcount = get_refcount(bs, next_cluster_index);
+
+ if (refcount < 0) {
+ return refcount;
+ } else if (refcount != 0) {
+ goto retry;
+ }
+ }
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
+ size,
+ (s->free_cluster_index - nb_clusters) << s->cluster_bits);
+#endif
+ return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
+}
+
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
+{
+ int64_t offset;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
+ offset = alloc_clusters_noref(bs, size);
+ if (offset < 0) {
+ return offset;
+ }
+
+ ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return offset;
+}
+
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t cluster_index;
+ uint64_t old_free_cluster_index;
+ int i, refcount, ret;
+
+ /* Check how many clusters there are free */
+ cluster_index = offset >> s->cluster_bits;
+ for(i = 0; i < nb_clusters; i++) {
+ refcount = get_refcount(bs, cluster_index++);
+
+ if (refcount < 0) {
+ return refcount;
+ } else if (refcount != 0) {
+ break;
+ }
+ }
+
+ /* And then allocate them */
+ old_free_cluster_index = s->free_cluster_index;
+ s->free_cluster_index = cluster_index + i;
+
+ ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->free_cluster_index = old_free_cluster_index;
+
+ return i;
+}
+
+/* only used to allocate compressed sectors. We try to allocate
+ contiguous sectors. size must be <= cluster_size */
+int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t offset, cluster_offset;
+ int free_in_cluster;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
+ assert(size > 0 && size <= s->cluster_size);
+ if (s->free_byte_offset == 0) {
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ return offset;
+ }
+ s->free_byte_offset = offset;
+ }
+ redo:
+ free_in_cluster = s->cluster_size -
+ (s->free_byte_offset & (s->cluster_size - 1));
+ if (size <= free_in_cluster) {
+ /* enough space in current cluster */
+ offset = s->free_byte_offset;
+ s->free_byte_offset += size;
+ free_in_cluster -= size;
+ if (free_in_cluster == 0)
+ s->free_byte_offset = 0;
+ if ((offset & (s->cluster_size - 1)) != 0)
+ update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
+ } else {
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ return offset;
+ }
+ cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
+ if ((cluster_offset + s->cluster_size) == offset) {
+ /* we are lucky: contiguous data */
+ offset = s->free_byte_offset;
+ update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
+ s->free_byte_offset += size;
+ } else {
+ s->free_byte_offset = offset;
+ goto redo;
+ }
+ }
+
+ /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
+ * or explicitly by update_cluster_refcount(). Refcount blocks must be
+ * flushed before the caller's L2 table updates.
+ */
+ qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
+ return offset;
+}
+
+void qcow2_free_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t size,
+ enum qcow2_discard_type type)
+{
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
+ ret = update_refcount(bs, offset, size, -1, type);
+ if (ret < 0) {
+ fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
+ /* TODO Remember the clusters to free them later and avoid leaking */
+ }
+}
+
+/*
+ * Free a cluster using its L2 entry (handles clusters of all types, e.g.
+ * normal cluster, compressed cluster, etc.)
+ */
+void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
+ int nb_clusters, enum qcow2_discard_type type)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ switch (qcow2_get_cluster_type(l2_entry)) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ {
+ int nb_csectors;
+ nb_csectors = ((l2_entry >> s->csize_shift) &
+ s->csize_mask) + 1;
+ qcow2_free_clusters(bs,
+ (l2_entry & s->cluster_offset_mask) & ~511,
+ nb_csectors * 512, type);
+ }
+ break;
+ case QCOW2_CLUSTER_NORMAL:
+ qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+ nb_clusters << s->cluster_bits, type);
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ case QCOW2_CLUSTER_ZERO:
+ break;
+ default:
+ abort();
+ }
+}
+
+
+
+/*********************************************************/
+/* snapshots and image creation */
+
+
+
+/* update the refcounts of snapshots and the copied flag */
+int qcow2_update_snapshot_refcount(BlockDriverState *bs,
+ int64_t l1_table_offset, int l1_size, int addend)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
+ int64_t old_offset, old_l2_offset;
+ int i, j, l1_modified = 0, nb_csectors, refcount;
+ int ret;
+
+ l2_table = NULL;
+ l1_table = NULL;
+ l1_size2 = l1_size * sizeof(uint64_t);
+
+ s->cache_discards = true;
+
+ /* WARNING: qcow2_snapshot_goto relies on this function not using the
+ * l1_table_offset when it is the current s->l1_table_offset! Be careful
+ * when changing this! */
+ if (l1_table_offset != s->l1_table_offset) {
+ l1_table = g_malloc0(align_offset(l1_size2, 512));
+ l1_allocated = 1;
+
+ ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for(i = 0;i < l1_size; i++)
+ be64_to_cpus(&l1_table[i]);
+ } else {
+ assert(l1_size == s->l1_size);
+ l1_table = s->l1_table;
+ l1_allocated = 0;
+ }
+
+ for(i = 0; i < l1_size; i++) {
+ l2_offset = l1_table[i];
+ if (l2_offset) {
+ old_l2_offset = l2_offset;
+ l2_offset &= L1E_OFFSET_MASK;
+
+ ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
+ (void**) &l2_table);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for(j = 0; j < s->l2_size; j++) {
+ offset = be64_to_cpu(l2_table[j]);
+ if (offset != 0) {
+ old_offset = offset;
+ offset &= ~QCOW_OFLAG_COPIED;
+ if (offset & QCOW_OFLAG_COMPRESSED) {
+ nb_csectors = ((offset >> s->csize_shift) &
+ s->csize_mask) + 1;
+ if (addend != 0) {
+ int ret;
+ ret = update_refcount(bs,
+ (offset & s->cluster_offset_mask) & ~511,
+ nb_csectors * 512, addend,
+ QCOW2_DISCARD_SNAPSHOT);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ /* compressed clusters are never modified */
+ refcount = 2;
+ } else {
+ uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
+ if (addend != 0) {
+ refcount = update_cluster_refcount(bs, cluster_index, addend,
+ QCOW2_DISCARD_SNAPSHOT);
+ } else {
+ refcount = get_refcount(bs, cluster_index);
+ }
+
+ if (refcount < 0) {
+ ret = refcount;
+ goto fail;
+ }
+ }
+
+ if (refcount == 1) {
+ offset |= QCOW_OFLAG_COPIED;
+ }
+ if (offset != old_offset) {
+ if (addend > 0) {
+ qcow2_cache_set_dependency(bs, s->l2_table_cache,
+ s->refcount_block_cache);
+ }
+ l2_table[j] = cpu_to_be64(offset);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ }
+ }
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ goto fail;
+ }
+
+
+ if (addend != 0) {
+ refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
+ QCOW2_DISCARD_SNAPSHOT);
+ } else {
+ refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
+ }
+ if (refcount < 0) {
+ ret = refcount;
+ goto fail;
+ } else if (refcount == 1) {
+ l2_offset |= QCOW_OFLAG_COPIED;
+ }
+ if (l2_offset != old_l2_offset) {
+ l1_table[i] = l2_offset;
+ l1_modified = 1;
+ }
+ }
+ }
+
+ ret = bdrv_flush(bs);
+fail:
+ if (l2_table) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ }
+
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ /* Update L1 only if it isn't deleted anyway (addend = -1) */
+ if (ret == 0 && addend >= 0 && l1_modified) {
+ for (i = 0; i < l1_size; i++) {
+ cpu_to_be64s(&l1_table[i]);
+ }
+
+ ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+
+ for (i = 0; i < l1_size; i++) {
+ be64_to_cpus(&l1_table[i]);
+ }
+ }
+ if (l1_allocated)
+ g_free(l1_table);
+ return ret;
+}
+
+
+
+
+/*********************************************************/
+/* refcount checking functions */
+
+
+
+/*
+ * Increases the refcount for a range of clusters in a given refcount table.
+ * This is used to construct a temporary refcount table out of L1 and L2 tables
+ * which can be compared the the refcount table saved in the image.
+ *
+ * Modifies the number of errors in res.
+ */
+static void inc_refcounts(BlockDriverState *bs,
+ BdrvCheckResult *res,
+ uint16_t *refcount_table,
+ int refcount_table_size,
+ int64_t offset, int64_t size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t start, last, cluster_offset;
+ int k;
+
+ if (size <= 0)
+ return;
+
+ start = offset & ~(s->cluster_size - 1);
+ last = (offset + size - 1) & ~(s->cluster_size - 1);
+ for(cluster_offset = start; cluster_offset <= last;
+ cluster_offset += s->cluster_size) {
+ k = cluster_offset >> s->cluster_bits;
+ if (k < 0) {
+ fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
+ cluster_offset);
+ res->corruptions++;
+ } else if (k >= refcount_table_size) {
+ fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
+ "the end of the image file, can't properly check refcounts.\n",
+ cluster_offset);
+ res->check_errors++;
+ } else {
+ if (++refcount_table[k] == 0) {
+ fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
+ "\n", cluster_offset);
+ res->corruptions++;
+ }
+ }
+ }
+}
+
+/* Flags for check_refcounts_l1() and check_refcounts_l2() */
+enum {
+ CHECK_OFLAG_COPIED = 0x1, /* check QCOW_OFLAG_COPIED matches refcount */
+ CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */
+};
+
+/*
+ * Increases the refcount in the given refcount table for the all clusters
+ * referenced in the L2 table. While doing so, performs some checks on L2
+ * entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
+ uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
+ int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table, l2_entry;
+ uint64_t next_contiguous_offset = 0;
+ int i, l2_size, nb_csectors, refcount;
+
+ /* Read L2 table from disk */
+ l2_size = s->l2_size * sizeof(uint64_t);
+ l2_table = g_malloc(l2_size);
+
+ if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
+ goto fail;
+
+ /* Do the actual checks */
+ for(i = 0; i < s->l2_size; i++) {
+ l2_entry = be64_to_cpu(l2_table[i]);
+
+ switch (qcow2_get_cluster_type(l2_entry)) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* Compressed clusters don't have QCOW_OFLAG_COPIED */
+ if (l2_entry & QCOW_OFLAG_COPIED) {
+ fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+ "copied flag must never be set for compressed "
+ "clusters\n", l2_entry >> s->cluster_bits);
+ l2_entry &= ~QCOW_OFLAG_COPIED;
+ res->corruptions++;
+ }
+
+ /* Mark cluster as used */
+ nb_csectors = ((l2_entry >> s->csize_shift) &
+ s->csize_mask) + 1;
+ l2_entry &= s->cluster_offset_mask;
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l2_entry & ~511, nb_csectors * 512);
+
+ if (flags & CHECK_FRAG_INFO) {
+ res->bfi.allocated_clusters++;
+ res->bfi.compressed_clusters++;
+
+ /* Compressed clusters are fragmented by nature. Since they
+ * take up sub-sector space but we only have sector granularity
+ * I/O we need to re-read the same sectors even for adjacent
+ * compressed clusters.
+ */
+ res->bfi.fragmented_clusters++;
+ }
+ break;
+
+ case QCOW2_CLUSTER_ZERO:
+ if ((l2_entry & L2E_OFFSET_MASK) == 0) {
+ break;
+ }
+ /* fall through */
+
+ case QCOW2_CLUSTER_NORMAL:
+ {
+ /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+ uint64_t offset = l2_entry & L2E_OFFSET_MASK;
+
+ if (flags & CHECK_OFLAG_COPIED) {
+ refcount = get_refcount(bs, offset >> s->cluster_bits);
+ if (refcount < 0) {
+ fprintf(stderr, "Can't get refcount for offset %"
+ PRIx64 ": %s\n", l2_entry, strerror(-refcount));
+ goto fail;
+ }
+ if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
+ fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
+ PRIx64 " refcount=%d\n", l2_entry, refcount);
+ res->corruptions++;
+ }
+ }
+
+ if (flags & CHECK_FRAG_INFO) {
+ res->bfi.allocated_clusters++;
+ if (next_contiguous_offset &&
+ offset != next_contiguous_offset) {
+ res->bfi.fragmented_clusters++;
+ }
+ next_contiguous_offset = offset + s->cluster_size;
+ }
+
+ /* Mark cluster as used */
+ inc_refcounts(bs, res, refcount_table,refcount_table_size,
+ offset, s->cluster_size);
+
+ /* Correct offsets are cluster aligned */
+ if (offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
+ "properly aligned; L2 entry corrupted.\n", offset);
+ res->corruptions++;
+ }
+ break;
+ }
+
+ case QCOW2_CLUSTER_UNALLOCATED:
+ break;
+
+ default:
+ abort();
+ }
+ }
+
+ g_free(l2_table);
+ return 0;
+
+fail:
+ fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
+ g_free(l2_table);
+ return -EIO;
+}
+
+/*
+ * Increases the refcount for the L1 table, its L2 tables and all referenced
+ * clusters in the given refcount table. While doing so, performs some checks
+ * on L1 and L2 entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l1(BlockDriverState *bs,
+ BdrvCheckResult *res,
+ uint16_t *refcount_table,
+ int refcount_table_size,
+ int64_t l1_table_offset, int l1_size,
+ int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l1_table, l2_offset, l1_size2;
+ int i, refcount, ret;
+
+ l1_size2 = l1_size * sizeof(uint64_t);
+
+ /* Mark L1 table as used */
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l1_table_offset, l1_size2);
+
+ /* Read L1 table entries from disk */
+ if (l1_size2 == 0) {
+ l1_table = NULL;
+ } else {
+ l1_table = g_malloc(l1_size2);
+ if (bdrv_pread(bs->file, l1_table_offset,
+ l1_table, l1_size2) != l1_size2)
+ goto fail;
+ for(i = 0;i < l1_size; i++)
+ be64_to_cpus(&l1_table[i]);
+ }
+
+ /* Do the actual checks */
+ for(i = 0; i < l1_size; i++) {
+ l2_offset = l1_table[i];
+ if (l2_offset) {
+ /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+ if (flags & CHECK_OFLAG_COPIED) {
+ refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
+ >> s->cluster_bits);
+ if (refcount < 0) {
+ fprintf(stderr, "Can't get refcount for l2_offset %"
+ PRIx64 ": %s\n", l2_offset, strerror(-refcount));
+ goto fail;
+ }
+ if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
+ fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
+ " refcount=%d\n", l2_offset, refcount);
+ res->corruptions++;
+ }
+ }
+
+ /* Mark L2 table as used */
+ l2_offset &= L1E_OFFSET_MASK;
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l2_offset, s->cluster_size);
+
+ /* L2 tables are cluster aligned */
+ if (l2_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
+ "cluster aligned; L1 entry corrupted\n", l2_offset);
+ res->corruptions++;
+ }
+
+ /* Process and check L2 entries */
+ ret = check_refcounts_l2(bs, res, refcount_table,
+ refcount_table_size, l2_offset, flags);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ }
+ g_free(l1_table);
+ return 0;
+
+fail:
+ fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+ res->check_errors++;
+ g_free(l1_table);
+ return -EIO;
+}
+
+/*
+ * Checks an image for refcount consistency.
+ *
+ * Returns 0 if no errors are found, the number of errors in case the image is
+ * detected as corrupted, and -errno when an internal error occurred.
+ */
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t size, i, highest_cluster;
+ int nb_clusters, refcount1, refcount2;
+ QCowSnapshot *sn;
+ uint16_t *refcount_table;
+ int ret;
+
+ size = bdrv_getlength(bs->file);
+ nb_clusters = size_to_clusters(s, size);
+ refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
+
+ res->bfi.total_clusters =
+ size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
+
+ /* header */
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ 0, s->cluster_size);
+
+ /* current L1 table */
+ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
+ s->l1_table_offset, s->l1_size,
+ CHECK_OFLAG_COPIED | CHECK_FRAG_INFO);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* snapshots */
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
+ sn->l1_table_offset, sn->l1_size, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ s->snapshots_offset, s->snapshots_size);
+
+ /* refcount data */
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ s->refcount_table_offset,
+ s->refcount_table_size * sizeof(uint64_t));
+
+ for(i = 0; i < s->refcount_table_size; i++) {
+ uint64_t offset, cluster;
+ offset = s->refcount_table[i];
+ cluster = offset >> s->cluster_bits;
+
+ /* Refcount blocks are cluster aligned */
+ if (offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
+ "cluster aligned; refcount table entry corrupted\n", i);
+ res->corruptions++;
+ continue;
+ }
+
+ if (cluster >= nb_clusters) {
+ fprintf(stderr, "ERROR refcount block %" PRId64
+ " is outside image\n", i);
+ res->corruptions++;
+ continue;
+ }
+
+ if (offset != 0) {
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ offset, s->cluster_size);
+ if (refcount_table[cluster] != 1) {
+ fprintf(stderr, "ERROR refcount block %" PRId64
+ " refcount=%d\n",
+ i, refcount_table[cluster]);
+ res->corruptions++;
+ }
+ }
+ }
+
+ /* compare ref counts */
+ for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
+ refcount1 = get_refcount(bs, i);
+ if (refcount1 < 0) {
+ fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
+ i, strerror(-refcount1));
+ res->check_errors++;
+ continue;
+ }
+
+ refcount2 = refcount_table[i];
+
+ if (refcount1 > 0 || refcount2 > 0) {
+ highest_cluster = i;
+ }
+
+ if (refcount1 != refcount2) {
+
+ /* Check if we're allowed to fix the mismatch */
+ int *num_fixed = NULL;
+ if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
+ num_fixed = &res->leaks_fixed;
+ } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
+ num_fixed = &res->corruptions_fixed;
+ }
+
+ fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
+ num_fixed != NULL ? "Repairing" :
+ refcount1 < refcount2 ? "ERROR" :
+ "Leaked",
+ i, refcount1, refcount2);
+
+ if (num_fixed) {
+ ret = update_refcount(bs, i << s->cluster_bits, 1,
+ refcount2 - refcount1,
+ QCOW2_DISCARD_ALWAYS);
+ if (ret >= 0) {
+ (*num_fixed)++;
+ continue;
+ }
+ }
+
+ /* And if we couldn't, print an error */
+ if (refcount1 < refcount2) {
+ res->corruptions++;
+ } else {
+ res->leaks++;
+ }
+ }
+ }
+
+ res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
+ ret = 0;
+
+fail:
+ g_free(refcount_table);
+
+ return ret;
+}
+
diff --git a/contrib/qemu/block/qcow2-snapshot.c b/contrib/qemu/block/qcow2-snapshot.c
new file mode 100644
index 000000000..0caac9055
--- /dev/null
+++ b/contrib/qemu/block/qcow2-snapshot.c
@@ -0,0 +1,660 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+typedef struct QEMU_PACKED QCowSnapshotHeader {
+ /* header is 8 byte aligned */
+ uint64_t l1_table_offset;
+
+ uint32_t l1_size;
+ uint16_t id_str_size;
+ uint16_t name_size;
+
+ uint32_t date_sec;
+ uint32_t date_nsec;
+
+ uint64_t vm_clock_nsec;
+
+ uint32_t vm_state_size;
+ uint32_t extra_data_size; /* for extension */
+ /* extra data follows */
+ /* id_str follows */
+ /* name follows */
+} QCowSnapshotHeader;
+
+typedef struct QEMU_PACKED QCowSnapshotExtraData {
+ uint64_t vm_state_size_large;
+ uint64_t disk_size;
+} QCowSnapshotExtraData;
+
+void qcow2_free_snapshots(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i;
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ g_free(s->snapshots[i].name);
+ g_free(s->snapshots[i].id_str);
+ }
+ g_free(s->snapshots);
+ s->snapshots = NULL;
+ s->nb_snapshots = 0;
+}
+
+int qcow2_read_snapshots(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshotHeader h;
+ QCowSnapshotExtraData extra;
+ QCowSnapshot *sn;
+ int i, id_str_size, name_size;
+ int64_t offset;
+ uint32_t extra_data_size;
+ int ret;
+
+ if (!s->nb_snapshots) {
+ s->snapshots = NULL;
+ s->snapshots_size = 0;
+ return 0;
+ }
+
+ offset = s->snapshots_offset;
+ s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ /* Read statically sized part of the snapshot header */
+ offset = align_offset(offset, 8);
+ ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ offset += sizeof(h);
+ sn = s->snapshots + i;
+ sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
+ sn->l1_size = be32_to_cpu(h.l1_size);
+ sn->vm_state_size = be32_to_cpu(h.vm_state_size);
+ sn->date_sec = be32_to_cpu(h.date_sec);
+ sn->date_nsec = be32_to_cpu(h.date_nsec);
+ sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
+ extra_data_size = be32_to_cpu(h.extra_data_size);
+
+ id_str_size = be16_to_cpu(h.id_str_size);
+ name_size = be16_to_cpu(h.name_size);
+
+ /* Read extra data */
+ ret = bdrv_pread(bs->file, offset, &extra,
+ MIN(sizeof(extra), extra_data_size));
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += extra_data_size;
+
+ if (extra_data_size >= 8) {
+ sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
+ }
+
+ if (extra_data_size >= 16) {
+ sn->disk_size = be64_to_cpu(extra.disk_size);
+ } else {
+ sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ }
+
+ /* Read snapshot ID */
+ sn->id_str = g_malloc(id_str_size + 1);
+ ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += id_str_size;
+ sn->id_str[id_str_size] = '\0';
+
+ /* Read snapshot name */
+ sn->name = g_malloc(name_size + 1);
+ ret = bdrv_pread(bs->file, offset, sn->name, name_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += name_size;
+ sn->name[name_size] = '\0';
+ }
+
+ s->snapshots_size = offset - s->snapshots_offset;
+ return 0;
+
+fail:
+ qcow2_free_snapshots(bs);
+ return ret;
+}
+
+/* add at the end of the file a new list of snapshots */
+static int qcow2_write_snapshots(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ QCowSnapshotHeader h;
+ QCowSnapshotExtraData extra;
+ int i, name_size, id_str_size, snapshots_size;
+ struct {
+ uint32_t nb_snapshots;
+ uint64_t snapshots_offset;
+ } QEMU_PACKED header_data;
+ int64_t offset, snapshots_offset;
+ int ret;
+
+ /* compute the size of the snapshots */
+ offset = 0;
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ offset = align_offset(offset, 8);
+ offset += sizeof(h);
+ offset += sizeof(extra);
+ offset += strlen(sn->id_str);
+ offset += strlen(sn->name);
+ }
+ snapshots_size = offset;
+
+ /* Allocate space for the new snapshot list */
+ snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
+ offset = snapshots_offset;
+ if (offset < 0) {
+ return offset;
+ }
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Write all snapshots to the new list */
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ memset(&h, 0, sizeof(h));
+ h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
+ h.l1_size = cpu_to_be32(sn->l1_size);
+ /* If it doesn't fit in 32 bit, older implementations should treat it
+ * as a disk-only snapshot rather than truncate the VM state */
+ if (sn->vm_state_size <= 0xffffffff) {
+ h.vm_state_size = cpu_to_be32(sn->vm_state_size);
+ }
+ h.date_sec = cpu_to_be32(sn->date_sec);
+ h.date_nsec = cpu_to_be32(sn->date_nsec);
+ h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
+ h.extra_data_size = cpu_to_be32(sizeof(extra));
+
+ memset(&extra, 0, sizeof(extra));
+ extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
+ extra.disk_size = cpu_to_be64(sn->disk_size);
+
+ id_str_size = strlen(sn->id_str);
+ name_size = strlen(sn->name);
+ h.id_str_size = cpu_to_be16(id_str_size);
+ h.name_size = cpu_to_be16(name_size);
+ offset = align_offset(offset, 8);
+
+ ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += sizeof(h);
+
+ ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += sizeof(extra);
+
+ ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += id_str_size;
+
+ ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += name_size;
+ }
+
+ /*
+ * Update the header to point to the new snapshot table. This requires the
+ * new table and its refcounts to be stable on disk.
+ */
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
+ offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
+
+ header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
+ header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
+
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+ &header_data, sizeof(header_data));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* free the old snapshot table */
+ qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
+ QCOW2_DISCARD_SNAPSHOT);
+ s->snapshots_offset = snapshots_offset;
+ s->snapshots_size = snapshots_size;
+ return 0;
+
+fail:
+ return ret;
+}
+
+static void find_new_snapshot_id(BlockDriverState *bs,
+ char *id_str, int id_str_size)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ int i, id, id_max = 0;
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ id = strtoul(sn->id_str, NULL, 10);
+ if (id > id_max)
+ id_max = id;
+ }
+ snprintf(id_str, id_str_size, "%d", id_max + 1);
+}
+
+static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i;
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ if (!strcmp(s->snapshots[i].id_str, id_str))
+ return i;
+ }
+ return -1;
+}
+
+static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, ret;
+
+ ret = find_snapshot_by_id(bs, name);
+ if (ret >= 0)
+ return ret;
+ for(i = 0; i < s->nb_snapshots; i++) {
+ if (!strcmp(s->snapshots[i].name, name))
+ return i;
+ }
+ return -1;
+}
+
+/* if no id is provided, a new one is constructed */
+int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *new_snapshot_list = NULL;
+ QCowSnapshot *old_snapshot_list = NULL;
+ QCowSnapshot sn1, *sn = &sn1;
+ int i, ret;
+ uint64_t *l1_table = NULL;
+ int64_t l1_table_offset;
+
+ memset(sn, 0, sizeof(*sn));
+
+ /* Generate an ID if it wasn't passed */
+ if (sn_info->id_str[0] == '\0') {
+ find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
+ }
+
+ /* Check that the ID is unique */
+ if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
+ return -EEXIST;
+ }
+
+ /* Populate sn with passed data */
+ sn->id_str = g_strdup(sn_info->id_str);
+ sn->name = g_strdup(sn_info->name);
+
+ sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ sn->vm_state_size = sn_info->vm_state_size;
+ sn->date_sec = sn_info->date_sec;
+ sn->date_nsec = sn_info->date_nsec;
+ sn->vm_clock_nsec = sn_info->vm_clock_nsec;
+
+ /* Allocate the L1 table of the snapshot and copy the current one there. */
+ l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
+ if (l1_table_offset < 0) {
+ ret = l1_table_offset;
+ goto fail;
+ }
+
+ sn->l1_table_offset = l1_table_offset;
+ sn->l1_size = s->l1_size;
+
+ l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+ for(i = 0; i < s->l1_size; i++) {
+ l1_table[i] = cpu_to_be64(s->l1_table[i]);
+ }
+
+ ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ g_free(l1_table);
+ l1_table = NULL;
+
+ /*
+ * Increase the refcounts of all clusters and make sure everything is
+ * stable on disk before updating the snapshot table to contain a pointer
+ * to the new L1 table.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Append the new snapshot to the snapshot list */
+ new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
+ if (s->snapshots) {
+ memcpy(new_snapshot_list, s->snapshots,
+ s->nb_snapshots * sizeof(QCowSnapshot));
+ old_snapshot_list = s->snapshots;
+ }
+ s->snapshots = new_snapshot_list;
+ s->snapshots[s->nb_snapshots++] = *sn;
+
+ ret = qcow2_write_snapshots(bs);
+ if (ret < 0) {
+ g_free(s->snapshots);
+ s->snapshots = old_snapshot_list;
+ goto fail;
+ }
+
+ g_free(old_snapshot_list);
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return 0;
+
+fail:
+ g_free(sn->id_str);
+ g_free(sn->name);
+ g_free(l1_table);
+
+ return ret;
+}
+
+/* copy the snapshot 'snapshot_name' into the current disk image */
+int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ int i, snapshot_index;
+ int cur_l1_bytes, sn_l1_bytes;
+ int ret;
+ uint64_t *sn_l1_table = NULL;
+
+ /* Search the snapshot */
+ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
+ if (snapshot_index < 0) {
+ return -ENOENT;
+ }
+ sn = &s->snapshots[snapshot_index];
+
+ if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
+ error_report("qcow2: Loading snapshots with different disk "
+ "size is not implemented");
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ /*
+ * Make sure that the current L1 table is big enough to contain the whole
+ * L1 table of the snapshot. If the snapshot L1 table is smaller, the
+ * current one must be padded with zeros.
+ */
+ ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ cur_l1_bytes = s->l1_size * sizeof(uint64_t);
+ sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
+
+ /*
+ * Copy the snapshot L1 table to the current L1 table.
+ *
+ * Before overwriting the old current L1 table on disk, make sure to
+ * increase all refcounts for the clusters referenced by the new one.
+ * Decrease the refcount referenced by the old one only when the L1
+ * table is overwritten.
+ */
+ sn_l1_table = g_malloc0(cur_l1_bytes);
+
+ ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
+ sn->l1_size, 1);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
+ cur_l1_bytes);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /*
+ * Decrease refcount of clusters of current L1 table.
+ *
+ * At this point, the in-memory s->l1_table points to the old L1 table,
+ * whereas on disk we already have the new one.
+ *
+ * qcow2_update_snapshot_refcount special cases the current L1 table to use
+ * the in-memory data instead of really using the offset to load a new one,
+ * which is why this works.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
+ s->l1_size, -1);
+
+ /*
+ * Now update the in-memory L1 table to be in sync with the on-disk one. We
+ * need to do this even if updating refcounts failed.
+ */
+ for(i = 0;i < s->l1_size; i++) {
+ s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
+ }
+
+ if (ret < 0) {
+ goto fail;
+ }
+
+ g_free(sn_l1_table);
+ sn_l1_table = NULL;
+
+ /*
+ * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
+ * when we decreased the refcount of the old snapshot.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return 0;
+
+fail:
+ g_free(sn_l1_table);
+ return ret;
+}
+
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot sn;
+ int snapshot_index, ret;
+
+ /* Search the snapshot */
+ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
+ if (snapshot_index < 0) {
+ return -ENOENT;
+ }
+ sn = s->snapshots[snapshot_index];
+
+ /* Remove it from the snapshot list */
+ memmove(s->snapshots + snapshot_index,
+ s->snapshots + snapshot_index + 1,
+ (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
+ s->nb_snapshots--;
+ ret = qcow2_write_snapshots(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * The snapshot is now unused, clean up. If we fail after this point, we
+ * won't recover but just leak clusters.
+ */
+ g_free(sn.id_str);
+ g_free(sn.name);
+
+ /*
+ * Now decrease the refcounts of clusters referenced by the snapshot and
+ * free the L1 table.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
+ sn.l1_size, -1);
+ if (ret < 0) {
+ return ret;
+ }
+ qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
+ QCOW2_DISCARD_SNAPSHOT);
+
+ /* must update the copied flag on the current cluster offsets */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return 0;
+}
+
+int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
+{
+ BDRVQcowState *s = bs->opaque;
+ QEMUSnapshotInfo *sn_tab, *sn_info;
+ QCowSnapshot *sn;
+ int i;
+
+ if (!s->nb_snapshots) {
+ *psn_tab = NULL;
+ return s->nb_snapshots;
+ }
+
+ sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn_info = sn_tab + i;
+ sn = s->snapshots + i;
+ pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
+ sn->id_str);
+ pstrcpy(sn_info->name, sizeof(sn_info->name),
+ sn->name);
+ sn_info->vm_state_size = sn->vm_state_size;
+ sn_info->date_sec = sn->date_sec;
+ sn_info->date_nsec = sn->date_nsec;
+ sn_info->vm_clock_nsec = sn->vm_clock_nsec;
+ }
+ *psn_tab = sn_tab;
+ return s->nb_snapshots;
+}
+
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
+{
+ int i, snapshot_index;
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ uint64_t *new_l1_table;
+ int new_l1_bytes;
+ int ret;
+
+ assert(bs->read_only);
+
+ /* Search the snapshot */
+ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
+ if (snapshot_index < 0) {
+ return -ENOENT;
+ }
+ sn = &s->snapshots[snapshot_index];
+
+ /* Allocate and read in the snapshot's L1 table */
+ new_l1_bytes = s->l1_size * sizeof(uint64_t);
+ new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
+
+ ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
+ if (ret < 0) {
+ g_free(new_l1_table);
+ return ret;
+ }
+
+ /* Switch the L1 table */
+ g_free(s->l1_table);
+
+ s->l1_size = sn->l1_size;
+ s->l1_table_offset = sn->l1_table_offset;
+ s->l1_table = new_l1_table;
+
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+
+ return 0;
+}
diff --git a/contrib/qemu/block/qcow2.c b/contrib/qemu/block/qcow2.c
new file mode 100644
index 000000000..0eceefe2c
--- /dev/null
+++ b/contrib/qemu/block/qcow2.c
@@ -0,0 +1,1825 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+#include <zlib.h>
+#include "qemu/aes.h"
+#include "block/qcow2.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qbool.h"
+#include "trace.h"
+
+/*
+ Differences with QCOW:
+
+ - Support for multiple incremental snapshots.
+ - Memory management by reference counts.
+ - Clusters which have a reference count of one have the bit
+ QCOW_OFLAG_COPIED to optimize write performance.
+ - Size of compressed clusters is stored in sectors to reduce bit usage
+ in the cluster offsets.
+ - Support for storing additional data (such as the VM state) in the
+ snapshots.
+ - If a backing store is used, the cluster size is not constrained
+ (could be backported to QCOW).
+ - L2 tables have always a size of one cluster.
+*/
+
+
+typedef struct {
+ uint32_t magic;
+ uint32_t len;
+} QCowExtension;
+
+#define QCOW2_EXT_MAGIC_END 0
+#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
+#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
+
+static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ const QCowHeader *cow_header = (const void *)buf;
+
+ if (buf_size >= sizeof(QCowHeader) &&
+ be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+ be32_to_cpu(cow_header->version) >= 2)
+ return 100;
+ else
+ return 0;
+}
+
+
+/*
+ * read qcow2 extension and fill bs
+ * start reading from start_offset
+ * finish reading upon magic of value 0 or when end_offset reached
+ * unknown magic is skipped (future extension this version knows nothing about)
+ * return 0 upon success, non-0 otherwise
+ */
+static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
+ uint64_t end_offset, void **p_feature_table)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowExtension ext;
+ uint64_t offset;
+ int ret;
+
+#ifdef DEBUG_EXT
+ printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
+#endif
+ offset = start_offset;
+ while (offset < end_offset) {
+
+#ifdef DEBUG_EXT
+ /* Sanity check */
+ if (offset > s->cluster_size)
+ printf("qcow2_read_extension: suspicious offset %lu\n", offset);
+
+ printf("attempting to read extended header in offset %lu\n", offset);
+#endif
+
+ if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
+ fprintf(stderr, "qcow2_read_extension: ERROR: "
+ "pread fail from offset %" PRIu64 "\n",
+ offset);
+ return 1;
+ }
+ be32_to_cpus(&ext.magic);
+ be32_to_cpus(&ext.len);
+ offset += sizeof(ext);
+#ifdef DEBUG_EXT
+ printf("ext.magic = 0x%x\n", ext.magic);
+#endif
+ if (ext.len > end_offset - offset) {
+ error_report("Header extension too large");
+ return -EINVAL;
+ }
+
+ switch (ext.magic) {
+ case QCOW2_EXT_MAGIC_END:
+ return 0;
+
+ case QCOW2_EXT_MAGIC_BACKING_FORMAT:
+ if (ext.len >= sizeof(bs->backing_format)) {
+ fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
+ " (>=%zu)\n",
+ ext.len, sizeof(bs->backing_format));
+ return 2;
+ }
+ if (bdrv_pread(bs->file, offset , bs->backing_format,
+ ext.len) != ext.len)
+ return 3;
+ bs->backing_format[ext.len] = '\0';
+#ifdef DEBUG_EXT
+ printf("Qcow2: Got format extension %s\n", bs->backing_format);
+#endif
+ break;
+
+ case QCOW2_EXT_MAGIC_FEATURE_TABLE:
+ if (p_feature_table != NULL) {
+ void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
+ ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
+ if (ret < 0) {
+ return ret;
+ }
+
+ *p_feature_table = feature_table;
+ }
+ break;
+
+ default:
+ /* unknown magic - save it in case we need to rewrite the header */
+ {
+ Qcow2UnknownHeaderExtension *uext;
+
+ uext = g_malloc0(sizeof(*uext) + ext.len);
+ uext->magic = ext.magic;
+ uext->len = ext.len;
+ QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
+
+ ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ break;
+ }
+
+ offset += ((ext.len + 7) & ~7);
+ }
+
+ return 0;
+}
+
+static void cleanup_unknown_header_ext(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2UnknownHeaderExtension *uext, *next;
+
+ QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
+ QLIST_REMOVE(uext, next);
+ g_free(uext);
+ }
+}
+
+static void GCC_FMT_ATTR(2, 3) report_unsupported(BlockDriverState *bs,
+ const char *fmt, ...)
+{
+ char msg[64];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "qcow2", msg);
+}
+
+static void report_unsupported_feature(BlockDriverState *bs,
+ Qcow2Feature *table, uint64_t mask)
+{
+ while (table && table->name[0] != '\0') {
+ if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
+ if (mask & (1 << table->bit)) {
+ report_unsupported(bs, "%.46s",table->name);
+ mask &= ~(1 << table->bit);
+ }
+ }
+ table++;
+ }
+
+ if (mask) {
+ report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask);
+ }
+}
+
+/*
+ * Sets the dirty bit and flushes afterwards if necessary.
+ *
+ * The incompatible_features bit is only set if the image file header was
+ * updated successfully. Therefore it is not required to check the return
+ * value of this function.
+ */
+int qcow2_mark_dirty(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t val;
+ int ret;
+
+ assert(s->qcow_version >= 3);
+
+ if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+ return 0; /* already dirty */
+ }
+
+ val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
+ ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
+ &val, sizeof(val));
+ if (ret < 0) {
+ return ret;
+ }
+ ret = bdrv_flush(bs->file);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Only treat image as dirty if the header was updated successfully */
+ s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
+ return 0;
+}
+
+/*
+ * Clears the dirty bit and flushes before if necessary. Only call this
+ * function when there are no pending requests, it does not guard against
+ * concurrent requests dirtying the image.
+ */
+static int qcow2_mark_clean(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+ int ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
+ return qcow2_update_header(bs);
+ }
+ return 0;
+}
+
+static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix)
+{
+ int ret = qcow2_check_refcounts(bs, result, fix);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (fix && result->check_errors == 0 && result->corruptions == 0) {
+ return qcow2_mark_clean(bs);
+ }
+ return ret;
+}
+
+static QemuOptsList qcow2_runtime_opts = {
+ .name = "qcow2",
+ .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
+ .desc = {
+ {
+ .name = "lazy_refcounts",
+ .type = QEMU_OPT_BOOL,
+ .help = "Postpone refcount updates",
+ },
+ {
+ .name = QCOW2_OPT_DISCARD_REQUEST,
+ .type = QEMU_OPT_BOOL,
+ .help = "Pass guest discard requests to the layer below",
+ },
+ {
+ .name = QCOW2_OPT_DISCARD_SNAPSHOT,
+ .type = QEMU_OPT_BOOL,
+ .help = "Generate discard requests when snapshot related space "
+ "is freed",
+ },
+ {
+ .name = QCOW2_OPT_DISCARD_OTHER,
+ .type = QEMU_OPT_BOOL,
+ .help = "Generate discard requests when other clusters are freed",
+ },
+ { /* end of list */ }
+ },
+};
+
+static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ int len, i, ret = 0;
+ QCowHeader header;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+ uint64_t ext_end;
+ uint64_t l1_vm_state_index;
+
+ ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ if (ret < 0) {
+ goto fail;
+ }
+ be32_to_cpus(&header.magic);
+ be32_to_cpus(&header.version);
+ be64_to_cpus(&header.backing_file_offset);
+ be32_to_cpus(&header.backing_file_size);
+ be64_to_cpus(&header.size);
+ be32_to_cpus(&header.cluster_bits);
+ be32_to_cpus(&header.crypt_method);
+ be64_to_cpus(&header.l1_table_offset);
+ be32_to_cpus(&header.l1_size);
+ be64_to_cpus(&header.refcount_table_offset);
+ be32_to_cpus(&header.refcount_table_clusters);
+ be64_to_cpus(&header.snapshots_offset);
+ be32_to_cpus(&header.nb_snapshots);
+
+ if (header.magic != QCOW_MAGIC) {
+ ret = -EMEDIUMTYPE;
+ goto fail;
+ }
+ if (header.version < 2 || header.version > 3) {
+ report_unsupported(bs, "QCOW version %d", header.version);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ s->qcow_version = header.version;
+
+ /* Initialise version 3 header fields */
+ if (header.version == 2) {
+ header.incompatible_features = 0;
+ header.compatible_features = 0;
+ header.autoclear_features = 0;
+ header.refcount_order = 4;
+ header.header_length = 72;
+ } else {
+ be64_to_cpus(&header.incompatible_features);
+ be64_to_cpus(&header.compatible_features);
+ be64_to_cpus(&header.autoclear_features);
+ be32_to_cpus(&header.refcount_order);
+ be32_to_cpus(&header.header_length);
+ }
+
+ if (header.header_length > sizeof(header)) {
+ s->unknown_header_fields_size = header.header_length - sizeof(header);
+ s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
+ ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
+ s->unknown_header_fields_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ if (header.backing_file_offset) {
+ ext_end = header.backing_file_offset;
+ } else {
+ ext_end = 1 << header.cluster_bits;
+ }
+
+ /* Handle feature bits */
+ s->incompatible_features = header.incompatible_features;
+ s->compatible_features = header.compatible_features;
+ s->autoclear_features = header.autoclear_features;
+
+ if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
+ void *feature_table = NULL;
+ qcow2_read_extensions(bs, header.header_length, ext_end,
+ &feature_table);
+ report_unsupported_feature(bs, feature_table,
+ s->incompatible_features &
+ ~QCOW2_INCOMPAT_MASK);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ /* Check support for various header values */
+ if (header.refcount_order != 4) {
+ report_unsupported(bs, "%d bit reference counts",
+ 1 << header.refcount_order);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ if (header.cluster_bits < MIN_CLUSTER_BITS ||
+ header.cluster_bits > MAX_CLUSTER_BITS) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (header.crypt_method > QCOW_CRYPT_AES) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->crypt_method_header = header.crypt_method;
+ if (s->crypt_method_header) {
+ bs->encrypted = 1;
+ }
+ s->cluster_bits = header.cluster_bits;
+ s->cluster_size = 1 << s->cluster_bits;
+ s->cluster_sectors = 1 << (s->cluster_bits - 9);
+ s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
+ s->l2_size = 1 << s->l2_bits;
+ bs->total_sectors = header.size / 512;
+ s->csize_shift = (62 - (s->cluster_bits - 8));
+ s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
+ s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
+ s->refcount_table_offset = header.refcount_table_offset;
+ s->refcount_table_size =
+ header.refcount_table_clusters << (s->cluster_bits - 3);
+
+ s->snapshots_offset = header.snapshots_offset;
+ s->nb_snapshots = header.nb_snapshots;
+
+ /* read the level 1 table */
+ s->l1_size = header.l1_size;
+
+ l1_vm_state_index = size_to_l1(s, header.size);
+ if (l1_vm_state_index > INT_MAX) {
+ ret = -EFBIG;
+ goto fail;
+ }
+ s->l1_vm_state_index = l1_vm_state_index;
+
+ /* the L1 table must contain at least enough entries to put
+ header.size bytes */
+ if (s->l1_size < s->l1_vm_state_index) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->l1_table_offset = header.l1_table_offset;
+ if (s->l1_size > 0) {
+ s->l1_table = g_malloc0(
+ align_offset(s->l1_size * sizeof(uint64_t), 512));
+ ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+ }
+
+ /* alloc L2 table/refcount block cache */
+ s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
+ s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
+
+ s->cluster_cache = g_malloc(s->cluster_size);
+ /* one more sector for decompressed data alignment */
+ s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+ + 512);
+ s->cluster_cache_offset = -1;
+ s->flags = flags;
+
+ ret = qcow2_refcount_init(bs);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ QLIST_INIT(&s->cluster_allocs);
+ QTAILQ_INIT(&s->discards);
+
+ /* read qcow2 extensions */
+ if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* read the backing file name */
+ if (header.backing_file_offset != 0) {
+ len = header.backing_file_size;
+ if (len > 1023) {
+ len = 1023;
+ }
+ ret = bdrv_pread(bs->file, header.backing_file_offset,
+ bs->backing_file, len);
+ if (ret < 0) {
+ goto fail;
+ }
+ bs->backing_file[len] = '\0';
+ }
+
+ ret = qcow2_read_snapshots(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Clear unknown autoclear feature bits */
+ if (!bs->read_only && s->autoclear_features != 0) {
+ s->autoclear_features = 0;
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ /* Initialise locks */
+ qemu_co_mutex_init(&s->lock);
+
+ /* Repair image if dirty */
+ if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
+ (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
+ BdrvCheckResult result = {0};
+
+ ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ /* Enable lazy_refcounts according to image and command line options */
+ opts = qemu_opts_create_nofail(&qcow2_runtime_opts);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
+ (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
+
+ s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
+ s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
+ s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
+ flags & BDRV_O_UNMAP);
+ s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
+ s->discard_passthrough[QCOW2_DISCARD_OTHER] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
+
+ qemu_opts_del(opts);
+
+ if (s->use_lazy_refcounts && s->qcow_version < 3) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Lazy refcounts require "
+ "a qcow2 image with at least qemu 1.1 compatibility level");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return ret;
+
+ fail:
+ g_free(s->unknown_header_fields);
+ cleanup_unknown_header_ext(bs);
+ qcow2_free_snapshots(bs);
+ qcow2_refcount_close(bs);
+ g_free(s->l1_table);
+ if (s->l2_table_cache) {
+ qcow2_cache_destroy(bs, s->l2_table_cache);
+ }
+ g_free(s->cluster_cache);
+ qemu_vfree(s->cluster_data);
+ return ret;
+}
+
+static int qcow2_set_key(BlockDriverState *bs, const char *key)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint8_t keybuf[16];
+ int len, i;
+
+ memset(keybuf, 0, 16);
+ len = strlen(key);
+ if (len > 16)
+ len = 16;
+ /* XXX: we could compress the chars to 7 bits to increase
+ entropy */
+ for(i = 0;i < len;i++) {
+ keybuf[i] = key[i];
+ }
+ s->crypt_method = s->crypt_method_header;
+
+ if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ return -1;
+ if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+ return -1;
+#if 0
+ /* test */
+ {
+ uint8_t in[16];
+ uint8_t out[16];
+ uint8_t tmp[16];
+ for(i=0;i<16;i++)
+ in[i] = i;
+ AES_encrypt(in, tmp, &s->aes_encrypt_key);
+ AES_decrypt(tmp, out, &s->aes_decrypt_key);
+ for(i = 0; i < 16; i++)
+ printf(" %02x", tmp[i]);
+ printf("\n");
+ for(i = 0; i < 16; i++)
+ printf(" %02x", out[i]);
+ printf("\n");
+ }
+#endif
+ return 0;
+}
+
+/* We have nothing to do for QCOW2 reopen, stubs just return
+ * success */
+static int qcow2_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t cluster_offset;
+ int ret;
+
+ *pnum = nb_sectors;
+ /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
+ * can't pass them on today */
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+ qemu_co_mutex_unlock(&s->lock);
+ if (ret < 0) {
+ *pnum = 0;
+ }
+
+ return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
+}
+
+/* handle reading after the end of the backing file */
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors)
+{
+ int n1;
+ if ((sector_num + nb_sectors) <= bs->total_sectors)
+ return nb_sectors;
+ if (sector_num >= bs->total_sectors)
+ n1 = 0;
+ else
+ n1 = bs->total_sectors - sector_num;
+
+ qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+
+ return n1;
+}
+
+static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster, n1;
+ int ret;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t cluster_offset = 0;
+ uint64_t bytes_done = 0;
+ QEMUIOVector hd_qiov;
+ uint8_t *cluster_data = NULL;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (remaining_sectors != 0) {
+
+ /* prepare next request */
+ cur_nr_sectors = remaining_sectors;
+ if (s->crypt_method) {
+ cur_nr_sectors = MIN(cur_nr_sectors,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ }
+
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9,
+ &cur_nr_sectors, &cluster_offset);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * 512);
+
+ switch (ret) {
+ case QCOW2_CLUSTER_UNALLOCATED:
+
+ if (bs->backing_hd) {
+ /* read from the base image */
+ n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
+ sector_num, cur_nr_sectors);
+ if (n1 > 0) {
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ n1, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ } else {
+ /* Note: in this case, no need to wait */
+ qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ }
+ break;
+
+ case QCOW2_CLUSTER_ZERO:
+ qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ break;
+
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* add AIO support for compressed blocks ? */
+ ret = qcow2_decompress_cluster(bs, cluster_offset);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ qemu_iovec_from_buf(&hd_qiov, 0,
+ s->cluster_cache + index_in_cluster * 512,
+ 512 * cur_nr_sectors);
+ break;
+
+ case QCOW2_CLUSTER_NORMAL:
+ if ((cluster_offset & 511) != 0) {
+ ret = -EIO;
+ goto fail;
+ }
+
+ if (s->crypt_method) {
+ /*
+ * For encrypted images, read everything into a temporary
+ * contiguous buffer on which the AES functions can work.
+ */
+ if (!cluster_data) {
+ cluster_data =
+ qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ }
+
+ assert(cur_nr_sectors <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cluster_data,
+ 512 * cur_nr_sectors);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ cur_nr_sectors, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (s->crypt_method) {
+ qcow2_encrypt_sectors(s, sector_num, cluster_data,
+ cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
+ qemu_iovec_from_buf(qiov, bytes_done,
+ cluster_data, 512 * cur_nr_sectors);
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ ret = -EIO;
+ goto fail;
+ }
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+ ret = 0;
+
+fail:
+ qemu_co_mutex_unlock(&s->lock);
+
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cluster_data);
+
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ int remaining_sectors,
+ QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster;
+ int n_end;
+ int ret;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t cluster_offset;
+ QEMUIOVector hd_qiov;
+ uint64_t bytes_done = 0;
+ uint8_t *cluster_data = NULL;
+ QCowL2Meta *l2meta = NULL;
+
+ trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
+ remaining_sectors);
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ s->cluster_cache_offset = -1; /* disable compressed cache */
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (remaining_sectors != 0) {
+
+ l2meta = NULL;
+
+ trace_qcow2_writev_start_part(qemu_coroutine_self());
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n_end = index_in_cluster + remaining_sectors;
+ if (s->crypt_method &&
+ n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
+ n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
+ }
+
+ ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
+ index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ assert((cluster_offset & 511) == 0);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * 512);
+
+ if (s->crypt_method) {
+ if (!cluster_data) {
+ cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
+ s->cluster_size);
+ }
+
+ assert(hd_qiov.size <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
+
+ qcow2_encrypt_sectors(s, sector_num, cluster_data,
+ cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cluster_data,
+ cur_nr_sectors * 512);
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+ trace_qcow2_writev_data(qemu_coroutine_self(),
+ (cluster_offset >> 9) + index_in_cluster);
+ ret = bdrv_co_writev(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ cur_nr_sectors, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ while (l2meta != NULL) {
+ QCowL2Meta *next;
+
+ ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Take the request off the list of running requests */
+ if (l2meta->nb_clusters != 0) {
+ QLIST_REMOVE(l2meta, next_in_flight);
+ }
+
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
+
+ next = l2meta->next;
+ g_free(l2meta);
+ l2meta = next;
+ }
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
+ }
+ ret = 0;
+
+fail:
+ qemu_co_mutex_unlock(&s->lock);
+
+ while (l2meta != NULL) {
+ QCowL2Meta *next;
+
+ if (l2meta->nb_clusters != 0) {
+ QLIST_REMOVE(l2meta, next_in_flight);
+ }
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
+
+ next = l2meta->next;
+ g_free(l2meta);
+ l2meta = next;
+ }
+
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cluster_data);
+ trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
+
+ return ret;
+}
+
+static void qcow2_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ g_free(s->l1_table);
+
+ qcow2_cache_flush(bs, s->l2_table_cache);
+ qcow2_cache_flush(bs, s->refcount_block_cache);
+
+ qcow2_mark_clean(bs);
+
+ qcow2_cache_destroy(bs, s->l2_table_cache);
+ qcow2_cache_destroy(bs, s->refcount_block_cache);
+
+ g_free(s->unknown_header_fields);
+ cleanup_unknown_header_ext(bs);
+
+ g_free(s->cluster_cache);
+ qemu_vfree(s->cluster_data);
+ qcow2_refcount_close(bs);
+ qcow2_free_snapshots(bs);
+}
+
+static void qcow2_invalidate_cache(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int flags = s->flags;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+ uint32_t crypt_method = 0;
+ QDict *options;
+
+ /*
+ * Backing files are read-only which makes all of their metadata immutable,
+ * that means we don't have to worry about reopening them here.
+ */
+
+ if (s->crypt_method) {
+ crypt_method = s->crypt_method;
+ memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key));
+ memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key));
+ }
+
+ qcow2_close(bs);
+
+ options = qdict_new();
+ qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS,
+ qbool_from_int(s->use_lazy_refcounts));
+
+ memset(s, 0, sizeof(BDRVQcowState));
+ qcow2_open(bs, options, flags);
+
+ QDECREF(options);
+
+ if (crypt_method) {
+ s->crypt_method = crypt_method;
+ memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
+ memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key));
+ }
+}
+
+static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
+ size_t len, size_t buflen)
+{
+ QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
+ size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
+
+ if (buflen < ext_len) {
+ return -ENOSPC;
+ }
+
+ *ext_backing_fmt = (QCowExtension) {
+ .magic = cpu_to_be32(magic),
+ .len = cpu_to_be32(len),
+ };
+ memcpy(buf + sizeof(QCowExtension), s, len);
+
+ return ext_len;
+}
+
+/*
+ * Updates the qcow2 header, including the variable length parts of it, i.e.
+ * the backing file name and all extensions. qcow2 was not designed to allow
+ * such changes, so if we run out of space (we can only use the first cluster)
+ * this function may fail.
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int qcow2_update_header(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowHeader *header;
+ char *buf;
+ size_t buflen = s->cluster_size;
+ int ret;
+ uint64_t total_size;
+ uint32_t refcount_table_clusters;
+ size_t header_length;
+ Qcow2UnknownHeaderExtension *uext;
+
+ buf = qemu_blockalign(bs, buflen);
+
+ /* Header structure */
+ header = (QCowHeader*) buf;
+
+ if (buflen < sizeof(*header)) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ header_length = sizeof(*header) + s->unknown_header_fields_size;
+ total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
+
+ *header = (QCowHeader) {
+ /* Version 2 fields */
+ .magic = cpu_to_be32(QCOW_MAGIC),
+ .version = cpu_to_be32(s->qcow_version),
+ .backing_file_offset = 0,
+ .backing_file_size = 0,
+ .cluster_bits = cpu_to_be32(s->cluster_bits),
+ .size = cpu_to_be64(total_size),
+ .crypt_method = cpu_to_be32(s->crypt_method_header),
+ .l1_size = cpu_to_be32(s->l1_size),
+ .l1_table_offset = cpu_to_be64(s->l1_table_offset),
+ .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
+ .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
+ .nb_snapshots = cpu_to_be32(s->nb_snapshots),
+ .snapshots_offset = cpu_to_be64(s->snapshots_offset),
+
+ /* Version 3 fields */
+ .incompatible_features = cpu_to_be64(s->incompatible_features),
+ .compatible_features = cpu_to_be64(s->compatible_features),
+ .autoclear_features = cpu_to_be64(s->autoclear_features),
+ .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
+ .header_length = cpu_to_be32(header_length),
+ };
+
+ /* For older versions, write a shorter header */
+ switch (s->qcow_version) {
+ case 2:
+ ret = offsetof(QCowHeader, incompatible_features);
+ break;
+ case 3:
+ ret = sizeof(*header);
+ break;
+ default:
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ memset(buf, 0, buflen);
+
+ /* Preserve any unknown field in the header */
+ if (s->unknown_header_fields_size) {
+ if (buflen < s->unknown_header_fields_size) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
+ buf += s->unknown_header_fields_size;
+ buflen -= s->unknown_header_fields_size;
+ }
+
+ /* Backing file format header extension */
+ if (*bs->backing_format) {
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
+ bs->backing_format, strlen(bs->backing_format),
+ buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ }
+
+ /* Feature table */
+ Qcow2Feature features[] = {
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
+ .name = "dirty bit",
+ },
+ {
+ .type = QCOW2_FEAT_TYPE_COMPATIBLE,
+ .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+ .name = "lazy refcounts",
+ },
+ };
+
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
+ features, sizeof(features), buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+ buf += ret;
+ buflen -= ret;
+
+ /* Keep unknown header extensions */
+ QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
+ ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ }
+
+ /* End of header extensions */
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+
+ /* Backing file name */
+ if (*bs->backing_file) {
+ size_t backing_file_len = strlen(bs->backing_file);
+
+ if (buflen < backing_file_len) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ /* Using strncpy is ok here, since buf is not NUL-terminated. */
+ strncpy(buf, bs->backing_file, buflen);
+
+ header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
+ header->backing_file_size = cpu_to_be32(backing_file_len);
+ }
+
+ /* Write the new header */
+ ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ qemu_vfree(header);
+ return ret;
+}
+
+static int qcow2_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt)
+{
+ pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
+
+ return qcow2_update_header(bs);
+}
+
+static int preallocate(BlockDriverState *bs)
+{
+ uint64_t nb_sectors;
+ uint64_t offset;
+ uint64_t host_offset = 0;
+ int num;
+ int ret;
+ QCowL2Meta *meta;
+
+ nb_sectors = bdrv_getlength(bs) >> 9;
+ offset = 0;
+
+ while (nb_sectors) {
+ num = MIN(nb_sectors, INT_MAX >> 9);
+ ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
+ &host_offset, &meta);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = qcow2_alloc_cluster_link_l2(bs, meta);
+ if (ret < 0) {
+ qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
+ QCOW2_DISCARD_NEVER);
+ return ret;
+ }
+
+ /* There are no dependent requests, but we need to remove our request
+ * from the list of in-flight requests */
+ if (meta != NULL) {
+ QLIST_REMOVE(meta, next_in_flight);
+ }
+
+ /* TODO Preallocate data if requested */
+
+ nb_sectors -= num;
+ offset += num << 9;
+ }
+
+ /*
+ * It is expected that the image file is large enough to actually contain
+ * all of the allocated clusters (otherwise we get failing reads after
+ * EOF). Extend the image to the last allocated sector.
+ */
+ if (host_offset != 0) {
+ uint8_t buf[512];
+ memset(buf, 0, 512);
+ ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int qcow2_create2(const char *filename, int64_t total_size,
+ const char *backing_file, const char *backing_format,
+ int flags, size_t cluster_size, int prealloc,
+ QEMUOptionParameter *options, int version)
+{
+ /* Calculate cluster_bits */
+ int cluster_bits;
+ cluster_bits = ffs(cluster_size) - 1;
+ if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
+ (1 << cluster_bits) != cluster_size)
+ {
+ error_report(
+ "Cluster size must be a power of two between %d and %dk",
+ 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
+ return -EINVAL;
+ }
+
+ /*
+ * Open the image file and write a minimal qcow2 header.
+ *
+ * We keep things simple and start with a zero-sized image. We also
+ * do without refcount blocks or a L1 table for now. We'll fix the
+ * inconsistency later.
+ *
+ * We do need a refcount table because growing the refcount table means
+ * allocating two new refcount blocks - the seconds of which would be at
+ * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
+ * size for any qcow2 image.
+ */
+ BlockDriverState* bs;
+ QCowHeader header;
+ uint8_t* refcount_table;
+ int ret;
+
+ ret = bdrv_create_file(filename, options);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Write the header */
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be32(QCOW_MAGIC);
+ header.version = cpu_to_be32(version);
+ header.cluster_bits = cpu_to_be32(cluster_bits);
+ header.size = cpu_to_be64(0);
+ header.l1_table_offset = cpu_to_be64(0);
+ header.l1_size = cpu_to_be32(0);
+ header.refcount_table_offset = cpu_to_be64(cluster_size);
+ header.refcount_table_clusters = cpu_to_be32(1);
+ header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
+ header.header_length = cpu_to_be32(sizeof(header));
+
+ if (flags & BLOCK_FLAG_ENCRYPT) {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+ } else {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+ }
+
+ if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
+ header.compatible_features |=
+ cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
+ }
+
+ ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* Write an empty refcount table */
+ refcount_table = g_malloc0(cluster_size);
+ ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
+ g_free(refcount_table);
+
+ if (ret < 0) {
+ goto out;
+ }
+
+ bdrv_close(bs);
+
+ /*
+ * And now open the image and make it consistent first (i.e. increase the
+ * refcount of the cluster that is occupied by the header and the refcount
+ * table)
+ */
+ BlockDriver* drv = bdrv_find_format("qcow2");
+ assert(drv != NULL);
+ ret = bdrv_open(bs, filename, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
+ if (ret < 0) {
+ goto out;
+
+ } else if (ret != 0) {
+ error_report("Huh, first cluster in empty image is already in use?");
+ abort();
+ }
+
+ /* Okay, now that we have a valid image, let's give it the right size */
+ ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* Want a backing file? There you go.*/
+ if (backing_file) {
+ ret = bdrv_change_backing_file(bs, backing_file, backing_format);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+
+ /* And if we're supposed to preallocate metadata, do that now */
+ if (prealloc) {
+ BDRVQcowState *s = bs->opaque;
+ qemu_co_mutex_lock(&s->lock);
+ ret = preallocate(bs);
+ qemu_co_mutex_unlock(&s->lock);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ bdrv_delete(bs);
+ return ret;
+}
+
+static int qcow2_create(const char *filename, QEMUOptionParameter *options)
+{
+ const char *backing_file = NULL;
+ const char *backing_fmt = NULL;
+ uint64_t sectors = 0;
+ int flags = 0;
+ size_t cluster_size = DEFAULT_CLUSTER_SIZE;
+ int prealloc = 0;
+ int version = 2;
+
+ /* Read out options */
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ sectors = options->value.n / 512;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
+ backing_fmt = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
+ flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
+ } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
+ if (options->value.n) {
+ cluster_size = options->value.n;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+ if (!options->value.s || !strcmp(options->value.s, "off")) {
+ prealloc = 0;
+ } else if (!strcmp(options->value.s, "metadata")) {
+ prealloc = 1;
+ } else {
+ fprintf(stderr, "Invalid preallocation mode: '%s'\n",
+ options->value.s);
+ return -EINVAL;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
+ if (!options->value.s || !strcmp(options->value.s, "0.10")) {
+ version = 2;
+ } else if (!strcmp(options->value.s, "1.1")) {
+ version = 3;
+ } else {
+ fprintf(stderr, "Invalid compatibility level: '%s'\n",
+ options->value.s);
+ return -EINVAL;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
+ flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
+ }
+ options++;
+ }
+
+ if (backing_file && prealloc) {
+ fprintf(stderr, "Backing file and preallocation cannot be used at "
+ "the same time\n");
+ return -EINVAL;
+ }
+
+ if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
+ fprintf(stderr, "Lazy refcounts only supported with compatibility "
+ "level 1.1 and above (use compat=1.1 or greater)\n");
+ return -EINVAL;
+ }
+
+ return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
+ cluster_size, prealloc, options, version);
+}
+
+static int qcow2_make_empty(BlockDriverState *bs)
+{
+#if 0
+ /* XXX: not correct */
+ BDRVQcowState *s = bs->opaque;
+ uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+ int ret;
+
+ memset(s->l1_table, 0, l1_length);
+ if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
+ return -1;
+ ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+ if (ret < 0)
+ return ret;
+
+ l2_cache_reset(bs);
+#endif
+ return 0;
+}
+
+static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ int ret;
+ BDRVQcowState *s = bs->opaque;
+
+ /* Emulate misaligned zero writes */
+ if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
+ return -ENOTSUP;
+ }
+
+ /* Whatever is left can use real zero clusters */
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors);
+ qemu_co_mutex_unlock(&s->lock);
+
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ int ret;
+ BDRVQcowState *s = bs->opaque;
+
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors);
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+}
+
+static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t new_l1_size;
+ int ret;
+
+ if (offset & 511) {
+ error_report("The new size must be a multiple of 512");
+ return -EINVAL;
+ }
+
+ /* cannot proceed if image has snapshots */
+ if (s->nb_snapshots) {
+ error_report("Can't resize an image which has snapshots");
+ return -ENOTSUP;
+ }
+
+ /* shrinking is currently not supported */
+ if (offset < bs->total_sectors * 512) {
+ error_report("qcow2 doesn't support shrinking images yet");
+ return -ENOTSUP;
+ }
+
+ new_l1_size = size_to_l1(s, offset);
+ ret = qcow2_grow_l1_table(bs, new_l1_size, true);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* write updated header.size */
+ offset = cpu_to_be64(offset);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+ &offset, sizeof(uint64_t));
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->l1_vm_state_index = new_l1_size;
+ return 0;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+ tables to avoid losing bytes in alignment */
+static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ z_stream strm;
+ int ret, out_len;
+ uint8_t *out_buf;
+ uint64_t cluster_offset;
+
+ if (nb_sectors == 0) {
+ /* align end of file to a sector boundary to ease reading with
+ sector based I/Os */
+ cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = (cluster_offset + 511) & ~511;
+ bdrv_truncate(bs->file, cluster_offset);
+ return 0;
+ }
+
+ if (nb_sectors != s->cluster_sectors) {
+ ret = -EINVAL;
+
+ /* Zero-pad last write if image size is not cluster aligned */
+ if (sector_num + nb_sectors == bs->total_sectors &&
+ nb_sectors < s->cluster_sectors) {
+ uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
+ memset(pad_buf, 0, s->cluster_size);
+ memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
+ ret = qcow2_write_compressed(bs, sector_num,
+ pad_buf, s->cluster_sectors);
+ qemu_vfree(pad_buf);
+ }
+ return ret;
+ }
+
+ out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+
+ /* best compression, small window, no zlib header */
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -12,
+ 9, Z_DEFAULT_STRATEGY);
+ if (ret != 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ strm.avail_in = s->cluster_size;
+ strm.next_in = (uint8_t *)buf;
+ strm.avail_out = s->cluster_size;
+ strm.next_out = out_buf;
+
+ ret = deflate(&strm, Z_FINISH);
+ if (ret != Z_STREAM_END && ret != Z_OK) {
+ deflateEnd(&strm);
+ ret = -EINVAL;
+ goto fail;
+ }
+ out_len = strm.next_out - out_buf;
+
+ deflateEnd(&strm);
+
+ if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+ /* could not compress: write normal cluster */
+ ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
+ sector_num << 9, out_len);
+ if (!cluster_offset) {
+ ret = -EIO;
+ goto fail;
+ }
+ cluster_offset &= s->cluster_offset_mask;
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
+ ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = 0;
+fail:
+ g_free(out_buf);
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+ }
+
+ if (qcow2_need_accurate_refcounts(s)) {
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+ }
+ }
+ qemu_co_mutex_unlock(&s->lock);
+
+ return 0;
+}
+
+static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
+{
+ return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
+}
+
+static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVQcowState *s = bs->opaque;
+ bdi->cluster_size = s->cluster_size;
+ bdi->vm_state_offset = qcow2_vm_state_offset(s);
+ return 0;
+}
+
+#if 0
+static void dump_refcounts(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t nb_clusters, k, k1, size;
+ int refcount;
+
+ size = bdrv_getlength(bs->file);
+ nb_clusters = size_to_clusters(s, size);
+ for(k = 0; k < nb_clusters;) {
+ k1 = k;
+ refcount = get_refcount(bs, k);
+ k++;
+ while (k < nb_clusters && get_refcount(bs, k) == refcount)
+ k++;
+ printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
+ k - k1);
+ }
+}
+#endif
+
+static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
+{
+ BDRVQcowState *s = bs->opaque;
+ int growable = bs->growable;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
+ bs->growable = 1;
+ ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
+ bs->growable = growable;
+
+ return ret;
+}
+
+static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int growable = bs->growable;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
+ bs->growable = 1;
+ ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
+ bs->growable = growable;
+
+ return ret;
+}
+
+static QEMUOptionParameter qcow2_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_COMPAT_LEVEL,
+ .type = OPT_STRING,
+ .help = "Compatibility level (0.10 or 1.1)"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FMT,
+ .type = OPT_STRING,
+ .help = "Image format of the base image"
+ },
+ {
+ .name = BLOCK_OPT_ENCRYPT,
+ .type = OPT_FLAG,
+ .help = "Encrypt the image"
+ },
+ {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = OPT_SIZE,
+ .help = "qcow2 cluster size",
+ .value = { .n = DEFAULT_CLUSTER_SIZE },
+ },
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, metadata)"
+ },
+ {
+ .name = BLOCK_OPT_LAZY_REFCOUNTS,
+ .type = OPT_FLAG,
+ .help = "Postpone refcount updates",
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_qcow2 = {
+ .format_name = "qcow2",
+ .instance_size = sizeof(BDRVQcowState),
+ .bdrv_probe = qcow2_probe,
+ .bdrv_open = qcow2_open,
+ .bdrv_close = qcow2_close,
+ .bdrv_reopen_prepare = qcow2_reopen_prepare,
+ .bdrv_create = qcow2_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_is_allocated = qcow2_co_is_allocated,
+ .bdrv_set_key = qcow2_set_key,
+ .bdrv_make_empty = qcow2_make_empty,
+
+ .bdrv_co_readv = qcow2_co_readv,
+ .bdrv_co_writev = qcow2_co_writev,
+ .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
+
+ .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
+ .bdrv_co_discard = qcow2_co_discard,
+ .bdrv_truncate = qcow2_truncate,
+ .bdrv_write_compressed = qcow2_write_compressed,
+
+ .bdrv_snapshot_create = qcow2_snapshot_create,
+ .bdrv_snapshot_goto = qcow2_snapshot_goto,
+ .bdrv_snapshot_delete = qcow2_snapshot_delete,
+ .bdrv_snapshot_list = qcow2_snapshot_list,
+ .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
+ .bdrv_get_info = qcow2_get_info,
+
+ .bdrv_save_vmstate = qcow2_save_vmstate,
+ .bdrv_load_vmstate = qcow2_load_vmstate,
+
+ .bdrv_change_backing_file = qcow2_change_backing_file,
+
+ .bdrv_invalidate_cache = qcow2_invalidate_cache,
+
+ .create_options = qcow2_create_options,
+ .bdrv_check = qcow2_check,
+};
+
+static void bdrv_qcow2_init(void)
+{
+ bdrv_register(&bdrv_qcow2);
+}
+
+block_init(bdrv_qcow2_init);
diff --git a/contrib/qemu/block/qcow2.h b/contrib/qemu/block/qcow2.h
new file mode 100644
index 000000000..3b2d5cda7
--- /dev/null
+++ b/contrib/qemu/block/qcow2.h
@@ -0,0 +1,437 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_QCOW2_H
+#define BLOCK_QCOW2_H
+
+#include "qemu/aes.h"
+#include "block/coroutine.h"
+
+//#define DEBUG_ALLOC
+//#define DEBUG_ALLOC2
+//#define DEBUG_EXT
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+
+#define QCOW_MAX_CRYPT_CLUSTERS 32
+
+/* indicate that the refcount of the referenced cluster is exactly one. */
+#define QCOW_OFLAG_COPIED (1LL << 63)
+/* indicate that the cluster is compressed (they never have the copied flag) */
+#define QCOW_OFLAG_COMPRESSED (1LL << 62)
+/* The cluster reads as all zeros */
+#define QCOW_OFLAG_ZERO (1LL << 0)
+
+#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
+
+#define MIN_CLUSTER_BITS 9
+#define MAX_CLUSTER_BITS 21
+
+#define L2_CACHE_SIZE 16
+
+/* Must be at least 4 to cover all cases of refcount table growth */
+#define REFCOUNT_CACHE_SIZE 4
+
+#define DEFAULT_CLUSTER_SIZE 65536
+
+
+#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define QCOW2_OPT_DISCARD_REQUEST "pass_discard_request"
+#define QCOW2_OPT_DISCARD_SNAPSHOT "pass_discard_snapshot"
+#define QCOW2_OPT_DISCARD_OTHER "pass_discard_other"
+
+typedef struct QCowHeader {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t cluster_bits;
+ uint64_t size; /* in bytes */
+ uint32_t crypt_method;
+ uint32_t l1_size; /* XXX: save number of clusters instead ? */
+ uint64_t l1_table_offset;
+ uint64_t refcount_table_offset;
+ uint32_t refcount_table_clusters;
+ uint32_t nb_snapshots;
+ uint64_t snapshots_offset;
+
+ /* The following fields are only valid for version >= 3 */
+ uint64_t incompatible_features;
+ uint64_t compatible_features;
+ uint64_t autoclear_features;
+
+ uint32_t refcount_order;
+ uint32_t header_length;
+} QCowHeader;
+
+typedef struct QCowSnapshot {
+ uint64_t l1_table_offset;
+ uint32_t l1_size;
+ char *id_str;
+ char *name;
+ uint64_t disk_size;
+ uint64_t vm_state_size;
+ uint32_t date_sec;
+ uint32_t date_nsec;
+ uint64_t vm_clock_nsec;
+} QCowSnapshot;
+
+struct Qcow2Cache;
+typedef struct Qcow2Cache Qcow2Cache;
+
+typedef struct Qcow2UnknownHeaderExtension {
+ uint32_t magic;
+ uint32_t len;
+ QLIST_ENTRY(Qcow2UnknownHeaderExtension) next;
+ uint8_t data[];
+} Qcow2UnknownHeaderExtension;
+
+enum {
+ QCOW2_FEAT_TYPE_INCOMPATIBLE = 0,
+ QCOW2_FEAT_TYPE_COMPATIBLE = 1,
+ QCOW2_FEAT_TYPE_AUTOCLEAR = 2,
+};
+
+/* Incompatible feature bits */
+enum {
+ QCOW2_INCOMPAT_DIRTY_BITNR = 0,
+ QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+
+ QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
+};
+
+/* Compatible feature bits */
+enum {
+ QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
+ QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+
+ QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
+};
+
+enum qcow2_discard_type {
+ QCOW2_DISCARD_NEVER = 0,
+ QCOW2_DISCARD_ALWAYS,
+ QCOW2_DISCARD_REQUEST,
+ QCOW2_DISCARD_SNAPSHOT,
+ QCOW2_DISCARD_OTHER,
+ QCOW2_DISCARD_MAX
+};
+
+typedef struct Qcow2Feature {
+ uint8_t type;
+ uint8_t bit;
+ char name[46];
+} QEMU_PACKED Qcow2Feature;
+
+typedef struct Qcow2DiscardRegion {
+ BlockDriverState *bs;
+ uint64_t offset;
+ uint64_t bytes;
+ QTAILQ_ENTRY(Qcow2DiscardRegion) next;
+} Qcow2DiscardRegion;
+
+typedef struct BDRVQcowState {
+ int cluster_bits;
+ int cluster_size;
+ int cluster_sectors;
+ int l2_bits;
+ int l2_size;
+ int l1_size;
+ int l1_vm_state_index;
+ int csize_shift;
+ int csize_mask;
+ uint64_t cluster_offset_mask;
+ uint64_t l1_table_offset;
+ uint64_t *l1_table;
+
+ Qcow2Cache* l2_table_cache;
+ Qcow2Cache* refcount_block_cache;
+
+ uint8_t *cluster_cache;
+ uint8_t *cluster_data;
+ uint64_t cluster_cache_offset;
+ QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
+
+ uint64_t *refcount_table;
+ uint64_t refcount_table_offset;
+ uint32_t refcount_table_size;
+ int64_t free_cluster_index;
+ int64_t free_byte_offset;
+
+ CoMutex lock;
+
+ uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+ uint32_t crypt_method_header;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+ uint64_t snapshots_offset;
+ int snapshots_size;
+ int nb_snapshots;
+ QCowSnapshot *snapshots;
+
+ int flags;
+ int qcow_version;
+ bool use_lazy_refcounts;
+
+ bool discard_passthrough[QCOW2_DISCARD_MAX];
+
+ uint64_t incompatible_features;
+ uint64_t compatible_features;
+ uint64_t autoclear_features;
+
+ size_t unknown_header_fields_size;
+ void* unknown_header_fields;
+ QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
+ QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
+ bool cache_discards;
+} BDRVQcowState;
+
+/* XXX: use std qcow open function ? */
+typedef struct QCowCreateState {
+ int cluster_size;
+ int cluster_bits;
+ uint16_t *refcount_block;
+ uint64_t *refcount_table;
+ int64_t l1_table_offset;
+ int64_t refcount_table_offset;
+ int64_t refcount_block_offset;
+} QCowCreateState;
+
+struct QCowAIOCB;
+
+typedef struct Qcow2COWRegion {
+ /**
+ * Offset of the COW region in bytes from the start of the first cluster
+ * touched by the request.
+ */
+ uint64_t offset;
+
+ /** Number of sectors to copy */
+ int nb_sectors;
+} Qcow2COWRegion;
+
+/**
+ * Describes an in-flight (part of a) write request that writes to clusters
+ * that are not referenced in their L2 table yet.
+ */
+typedef struct QCowL2Meta
+{
+ /** Guest offset of the first newly allocated cluster */
+ uint64_t offset;
+
+ /** Host offset of the first newly allocated cluster */
+ uint64_t alloc_offset;
+
+ /**
+ * Number of sectors from the start of the first allocated cluster to
+ * the end of the (possibly shortened) request
+ */
+ int nb_available;
+
+ /** Number of newly allocated clusters */
+ int nb_clusters;
+
+ /**
+ * Requests that overlap with this allocation and wait to be restarted
+ * when the allocating request has completed.
+ */
+ CoQueue dependent_requests;
+
+ /**
+ * The COW Region between the start of the first allocated cluster and the
+ * area the guest actually writes to.
+ */
+ Qcow2COWRegion cow_start;
+
+ /**
+ * The COW Region between the area the guest actually writes to and the
+ * end of the last allocated cluster.
+ */
+ Qcow2COWRegion cow_end;
+
+ /** Pointer to next L2Meta of the same write request */
+ struct QCowL2Meta *next;
+
+ QLIST_ENTRY(QCowL2Meta) next_in_flight;
+} QCowL2Meta;
+
+enum {
+ QCOW2_CLUSTER_UNALLOCATED,
+ QCOW2_CLUSTER_NORMAL,
+ QCOW2_CLUSTER_COMPRESSED,
+ QCOW2_CLUSTER_ZERO
+};
+
+#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+
+#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
+
+static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
+{
+ return offset & ~(s->cluster_size - 1);
+}
+
+static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
+{
+ return offset & (s->cluster_size - 1);
+}
+
+static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
+{
+ return (size + (s->cluster_size - 1)) >> s->cluster_bits;
+}
+
+static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
+{
+ int shift = s->cluster_bits + s->l2_bits;
+ return (size + (1ULL << shift) - 1) >> shift;
+}
+
+static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
+{
+ return (offset >> s->cluster_bits) & (s->l2_size - 1);
+}
+
+static inline int64_t align_offset(int64_t offset, int n)
+{
+ offset = (offset + n - 1) & ~(n - 1);
+ return offset;
+}
+
+static inline int qcow2_get_cluster_type(uint64_t l2_entry)
+{
+ if (l2_entry & QCOW_OFLAG_COMPRESSED) {
+ return QCOW2_CLUSTER_COMPRESSED;
+ } else if (l2_entry & QCOW_OFLAG_ZERO) {
+ return QCOW2_CLUSTER_ZERO;
+ } else if (!(l2_entry & L2E_OFFSET_MASK)) {
+ return QCOW2_CLUSTER_UNALLOCATED;
+ } else {
+ return QCOW2_CLUSTER_NORMAL;
+ }
+}
+
+/* Check whether refcounts are eager or lazy */
+static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
+{
+ return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
+}
+
+static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
+{
+ return m->offset + m->cow_start.offset;
+}
+
+static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
+{
+ return m->offset + m->cow_end.offset
+ + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
+}
+
+// FIXME Need qcow2_ prefix to global functions
+
+/* qcow2.c functions */
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors);
+
+int qcow2_mark_dirty(BlockDriverState *bs);
+int qcow2_update_header(BlockDriverState *bs);
+
+/* qcow2-refcount.c functions */
+int qcow2_refcount_init(BlockDriverState *bs);
+void qcow2_refcount_close(BlockDriverState *bs);
+
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int nb_clusters);
+int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
+void qcow2_free_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t size,
+ enum qcow2_discard_type type);
+void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
+ int nb_clusters, enum qcow2_discard_type type);
+
+int qcow2_update_snapshot_refcount(BlockDriverState *bs,
+ int64_t l1_table_offset, int l1_size, int addend);
+
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix);
+
+void qcow2_process_discards(BlockDriverState *bs, int ret);
+
+/* qcow2-cluster.c functions */
+int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
+ bool exact_size);
+void qcow2_l2_cache_reset(BlockDriverState *bs);
+int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
+void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key);
+
+int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int *num, uint64_t *cluster_offset);
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
+uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
+ uint64_t offset,
+ int compressed_size);
+
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+ int nb_sectors);
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
+
+/* qcow2-snapshot.c functions */
+int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
+int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
+int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);
+
+void qcow2_free_snapshots(BlockDriverState *bs);
+int qcow2_read_snapshots(BlockDriverState *bs);
+
+/* qcow2-cache.c functions */
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
+int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
+
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
+int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
+ Qcow2Cache *dependency);
+void qcow2_cache_depends_on_flush(Qcow2Cache *c);
+
+int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table);
+int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table);
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
+
+#endif
diff --git a/contrib/qemu/block/qed-check.c b/contrib/qemu/block/qed-check.c
new file mode 100644
index 000000000..b473dcd61
--- /dev/null
+++ b/contrib/qemu/block/qed-check.c
@@ -0,0 +1,248 @@
+/*
+ * QEMU Enhanced Disk Format Consistency Check
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qed.h"
+
+typedef struct {
+ BDRVQEDState *s;
+ BdrvCheckResult *result;
+ bool fix; /* whether to fix invalid offsets */
+
+ uint64_t nclusters;
+ uint32_t *used_clusters; /* referenced cluster bitmap */
+
+ QEDRequest request;
+} QEDCheck;
+
+static bool qed_test_bit(uint32_t *bitmap, uint64_t n) {
+ return !!(bitmap[n / 32] & (1 << (n % 32)));
+}
+
+static void qed_set_bit(uint32_t *bitmap, uint64_t n) {
+ bitmap[n / 32] |= 1 << (n % 32);
+}
+
+/**
+ * Set bitmap bits for clusters
+ *
+ * @check: Check structure
+ * @offset: Starting offset in bytes
+ * @n: Number of clusters
+ */
+static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset,
+ unsigned int n)
+{
+ uint64_t cluster = qed_bytes_to_clusters(check->s, offset);
+ unsigned int corruptions = 0;
+
+ while (n-- != 0) {
+ /* Clusters should only be referenced once */
+ if (qed_test_bit(check->used_clusters, cluster)) {
+ corruptions++;
+ }
+
+ qed_set_bit(check->used_clusters, cluster);
+ cluster++;
+ }
+
+ check->result->corruptions += corruptions;
+ return corruptions == 0;
+}
+
+/**
+ * Check an L2 table
+ *
+ * @ret: Number of invalid cluster offsets
+ */
+static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
+{
+ BDRVQEDState *s = check->s;
+ unsigned int i, num_invalid = 0;
+ uint64_t last_offset = 0;
+
+ for (i = 0; i < s->table_nelems; i++) {
+ uint64_t offset = table->offsets[i];
+
+ if (qed_offset_is_unalloc_cluster(offset) ||
+ qed_offset_is_zero_cluster(offset)) {
+ continue;
+ }
+ check->result->bfi.allocated_clusters++;
+ if (last_offset && (last_offset + s->header.cluster_size != offset)) {
+ check->result->bfi.fragmented_clusters++;
+ }
+ last_offset = offset;
+
+ /* Detect invalid cluster offset */
+ if (!qed_check_cluster_offset(s, offset)) {
+ if (check->fix) {
+ table->offsets[i] = 0;
+ check->result->corruptions_fixed++;
+ } else {
+ check->result->corruptions++;
+ }
+
+ num_invalid++;
+ continue;
+ }
+
+ qed_set_used_clusters(check, offset, 1);
+ }
+
+ return num_invalid;
+}
+
+/**
+ * Descend tables and check each cluster is referenced once only
+ */
+static int qed_check_l1_table(QEDCheck *check, QEDTable *table)
+{
+ BDRVQEDState *s = check->s;
+ unsigned int i, num_invalid_l1 = 0;
+ int ret, last_error = 0;
+
+ /* Mark L1 table clusters used */
+ qed_set_used_clusters(check, s->header.l1_table_offset,
+ s->header.table_size);
+
+ for (i = 0; i < s->table_nelems; i++) {
+ unsigned int num_invalid_l2;
+ uint64_t offset = table->offsets[i];
+
+ if (qed_offset_is_unalloc_cluster(offset)) {
+ continue;
+ }
+
+ /* Detect invalid L2 offset */
+ if (!qed_check_table_offset(s, offset)) {
+ /* Clear invalid offset */
+ if (check->fix) {
+ table->offsets[i] = 0;
+ check->result->corruptions_fixed++;
+ } else {
+ check->result->corruptions++;
+ }
+
+ num_invalid_l1++;
+ continue;
+ }
+
+ if (!qed_set_used_clusters(check, offset, s->header.table_size)) {
+ continue; /* skip an invalid table */
+ }
+
+ ret = qed_read_l2_table_sync(s, &check->request, offset);
+ if (ret) {
+ check->result->check_errors++;
+ last_error = ret;
+ continue;
+ }
+
+ num_invalid_l2 = qed_check_l2_table(check,
+ check->request.l2_table->table);
+
+ /* Write out fixed L2 table */
+ if (num_invalid_l2 > 0 && check->fix) {
+ ret = qed_write_l2_table_sync(s, &check->request, 0,
+ s->table_nelems, false);
+ if (ret) {
+ check->result->check_errors++;
+ last_error = ret;
+ continue;
+ }
+ }
+ }
+
+ /* Drop reference to final table */
+ qed_unref_l2_cache_entry(check->request.l2_table);
+ check->request.l2_table = NULL;
+
+ /* Write out fixed L1 table */
+ if (num_invalid_l1 > 0 && check->fix) {
+ ret = qed_write_l1_table_sync(s, 0, s->table_nelems);
+ if (ret) {
+ check->result->check_errors++;
+ last_error = ret;
+ }
+ }
+
+ return last_error;
+}
+
+/**
+ * Check for unreferenced (leaked) clusters
+ */
+static void qed_check_for_leaks(QEDCheck *check)
+{
+ BDRVQEDState *s = check->s;
+ uint64_t i;
+
+ for (i = s->header.header_size; i < check->nclusters; i++) {
+ if (!qed_test_bit(check->used_clusters, i)) {
+ check->result->leaks++;
+ }
+ }
+}
+
+/**
+ * Mark an image clean once it passes check or has been repaired
+ */
+static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
+{
+ /* Skip if there were unfixable corruptions or I/O errors */
+ if (result->corruptions > 0 || result->check_errors > 0) {
+ return;
+ }
+
+ /* Skip if image is already marked clean */
+ if (!(s->header.features & QED_F_NEED_CHECK)) {
+ return;
+ }
+
+ /* Ensure fixes reach storage before clearing check bit */
+ bdrv_flush(s->bs);
+
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header_sync(s);
+}
+
+int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
+{
+ QEDCheck check = {
+ .s = s,
+ .result = result,
+ .nclusters = qed_bytes_to_clusters(s, s->file_size),
+ .request = { .l2_table = NULL },
+ .fix = fix,
+ };
+ int ret;
+
+ check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
+ sizeof(check.used_clusters[0]));
+
+ check.result->bfi.total_clusters =
+ (s->header.image_size + s->header.cluster_size - 1) /
+ s->header.cluster_size;
+ ret = qed_check_l1_table(&check, s->l1_table);
+ if (ret == 0) {
+ /* Only check for leaks if entire image was scanned successfully */
+ qed_check_for_leaks(&check);
+
+ if (fix) {
+ qed_check_mark_clean(s, result);
+ }
+ }
+
+ g_free(check.used_clusters);
+ return ret;
+}
diff --git a/contrib/qemu/block/qed-cluster.c b/contrib/qemu/block/qed-cluster.c
new file mode 100644
index 000000000..f64b2af8f
--- /dev/null
+++ b/contrib/qemu/block/qed-cluster.c
@@ -0,0 +1,165 @@
+/*
+ * QEMU Enhanced Disk Format Cluster functions
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qed.h"
+
+/**
+ * Count the number of contiguous data clusters
+ *
+ * @s: QED state
+ * @table: L2 table
+ * @index: First cluster index
+ * @n: Maximum number of clusters
+ * @offset: Set to first cluster offset
+ *
+ * This function scans tables for contiguous clusters. A contiguous run of
+ * clusters may be allocated, unallocated, or zero.
+ */
+static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
+ QEDTable *table,
+ unsigned int index,
+ unsigned int n,
+ uint64_t *offset)
+{
+ unsigned int end = MIN(index + n, s->table_nelems);
+ uint64_t last = table->offsets[index];
+ unsigned int i;
+
+ *offset = last;
+
+ for (i = index + 1; i < end; i++) {
+ if (qed_offset_is_unalloc_cluster(last)) {
+ /* Counting unallocated clusters */
+ if (!qed_offset_is_unalloc_cluster(table->offsets[i])) {
+ break;
+ }
+ } else if (qed_offset_is_zero_cluster(last)) {
+ /* Counting zero clusters */
+ if (!qed_offset_is_zero_cluster(table->offsets[i])) {
+ break;
+ }
+ } else {
+ /* Counting allocated clusters */
+ if (table->offsets[i] != last + s->header.cluster_size) {
+ break;
+ }
+ last = table->offsets[i];
+ }
+ }
+ return i - index;
+}
+
+typedef struct {
+ BDRVQEDState *s;
+ uint64_t pos;
+ size_t len;
+
+ QEDRequest *request;
+
+ /* User callback */
+ QEDFindClusterFunc *cb;
+ void *opaque;
+} QEDFindClusterCB;
+
+static void qed_find_cluster_cb(void *opaque, int ret)
+{
+ QEDFindClusterCB *find_cluster_cb = opaque;
+ BDRVQEDState *s = find_cluster_cb->s;
+ QEDRequest *request = find_cluster_cb->request;
+ uint64_t offset = 0;
+ size_t len = 0;
+ unsigned int index;
+ unsigned int n;
+
+ if (ret) {
+ goto out;
+ }
+
+ index = qed_l2_index(s, find_cluster_cb->pos);
+ n = qed_bytes_to_clusters(s,
+ qed_offset_into_cluster(s, find_cluster_cb->pos) +
+ find_cluster_cb->len);
+ n = qed_count_contiguous_clusters(s, request->l2_table->table,
+ index, n, &offset);
+
+ if (qed_offset_is_unalloc_cluster(offset)) {
+ ret = QED_CLUSTER_L2;
+ } else if (qed_offset_is_zero_cluster(offset)) {
+ ret = QED_CLUSTER_ZERO;
+ } else if (qed_check_cluster_offset(s, offset)) {
+ ret = QED_CLUSTER_FOUND;
+ } else {
+ ret = -EINVAL;
+ }
+
+ len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
+ qed_offset_into_cluster(s, find_cluster_cb->pos));
+
+out:
+ find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
+ g_free(find_cluster_cb);
+}
+
+/**
+ * Find the offset of a data cluster
+ *
+ * @s: QED state
+ * @request: L2 cache entry
+ * @pos: Byte position in device
+ * @len: Number of bytes
+ * @cb: Completion function
+ * @opaque: User data for completion function
+ *
+ * This function translates a position in the block device to an offset in the
+ * image file. It invokes the cb completion callback to report back the
+ * translated offset or unallocated range in the image file.
+ *
+ * If the L2 table exists, request->l2_table points to the L2 table cache entry
+ * and the caller must free the reference when they are finished. The cache
+ * entry is exposed in this way to avoid callers having to read the L2 table
+ * again later during request processing. If request->l2_table is non-NULL it
+ * will be unreferenced before taking on the new cache entry.
+ */
+void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+ size_t len, QEDFindClusterFunc *cb, void *opaque)
+{
+ QEDFindClusterCB *find_cluster_cb;
+ uint64_t l2_offset;
+
+ /* Limit length to L2 boundary. Requests are broken up at the L2 boundary
+ * so that a request acts on one L2 table at a time.
+ */
+ len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
+
+ l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
+ if (qed_offset_is_unalloc_cluster(l2_offset)) {
+ cb(opaque, QED_CLUSTER_L1, 0, len);
+ return;
+ }
+ if (!qed_check_table_offset(s, l2_offset)) {
+ cb(opaque, -EINVAL, 0, 0);
+ return;
+ }
+
+ find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
+ find_cluster_cb->s = s;
+ find_cluster_cb->pos = pos;
+ find_cluster_cb->len = len;
+ find_cluster_cb->cb = cb;
+ find_cluster_cb->opaque = opaque;
+ find_cluster_cb->request = request;
+
+ qed_read_l2_table(s, request, l2_offset,
+ qed_find_cluster_cb, find_cluster_cb);
+}
diff --git a/contrib/qemu/block/qed-gencb.c b/contrib/qemu/block/qed-gencb.c
new file mode 100644
index 000000000..7d7ac1ffc
--- /dev/null
+++ b/contrib/qemu/block/qed-gencb.c
@@ -0,0 +1,32 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qed.h"
+
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
+{
+ GenericCB *gencb = g_malloc(len);
+ gencb->cb = cb;
+ gencb->opaque = opaque;
+ return gencb;
+}
+
+void gencb_complete(void *opaque, int ret)
+{
+ GenericCB *gencb = opaque;
+ BlockDriverCompletionFunc *cb = gencb->cb;
+ void *user_opaque = gencb->opaque;
+
+ g_free(gencb);
+ cb(user_opaque, ret);
+}
diff --git a/contrib/qemu/block/qed-l2-cache.c b/contrib/qemu/block/qed-l2-cache.c
new file mode 100644
index 000000000..e9b2aae44
--- /dev/null
+++ b/contrib/qemu/block/qed-l2-cache.c
@@ -0,0 +1,187 @@
+/*
+ * QEMU Enhanced Disk Format L2 Cache
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+/*
+ * L2 table cache usage is as follows:
+ *
+ * An open image has one L2 table cache that is used to avoid accessing the
+ * image file for recently referenced L2 tables.
+ *
+ * Cluster offset lookup translates the logical offset within the block device
+ * to a cluster offset within the image file. This is done by indexing into
+ * the L1 and L2 tables which store cluster offsets. It is here where the L2
+ * table cache serves up recently referenced L2 tables.
+ *
+ * If there is a cache miss, that L2 table is read from the image file and
+ * committed to the cache. Subsequent accesses to that L2 table will be served
+ * from the cache until the table is evicted from the cache.
+ *
+ * L2 tables are also committed to the cache when new L2 tables are allocated
+ * in the image file. Since the L2 table cache is write-through, the new L2
+ * table is first written out to the image file and then committed to the
+ * cache.
+ *
+ * Multiple I/O requests may be using an L2 table cache entry at any given
+ * time. That means an entry may be in use across several requests and
+ * reference counting is needed to free the entry at the correct time. In
+ * particular, an entry evicted from the cache will only be freed once all
+ * references are dropped.
+ *
+ * An in-flight I/O request will hold a reference to a L2 table cache entry for
+ * the period during which it needs to access the L2 table. This includes
+ * cluster offset lookup, L2 table allocation, and L2 table update when a new
+ * data cluster has been allocated.
+ *
+ * An interesting case occurs when two requests need to access an L2 table that
+ * is not in the cache. Since the operation to read the table from the image
+ * file takes some time to complete, both requests may see a cache miss and
+ * start reading the L2 table from the image file. The first to finish will
+ * commit its L2 table into the cache. When the second tries to commit its
+ * table will be deleted in favor of the existing cache entry.
+ */
+
+#include "trace.h"
+#include "qed.h"
+
+/* Each L2 holds 2GB so this let's us fully cache a 100GB disk */
+#define MAX_L2_CACHE_SIZE 50
+
+/**
+ * Initialize the L2 cache
+ */
+void qed_init_l2_cache(L2TableCache *l2_cache)
+{
+ QTAILQ_INIT(&l2_cache->entries);
+ l2_cache->n_entries = 0;
+}
+
+/**
+ * Free the L2 cache
+ */
+void qed_free_l2_cache(L2TableCache *l2_cache)
+{
+ CachedL2Table *entry, *next_entry;
+
+ QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next_entry) {
+ qemu_vfree(entry->table);
+ g_free(entry);
+ }
+}
+
+/**
+ * Allocate an uninitialized entry from the cache
+ *
+ * The returned entry has a reference count of 1 and is owned by the caller.
+ * The caller must allocate the actual table field for this entry and it must
+ * be freeable using qemu_vfree().
+ */
+CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
+{
+ CachedL2Table *entry;
+
+ entry = g_malloc0(sizeof(*entry));
+ entry->ref++;
+
+ trace_qed_alloc_l2_cache_entry(l2_cache, entry);
+
+ return entry;
+}
+
+/**
+ * Decrease an entry's reference count and free if necessary when the reference
+ * count drops to zero.
+ */
+void qed_unref_l2_cache_entry(CachedL2Table *entry)
+{
+ if (!entry) {
+ return;
+ }
+
+ entry->ref--;
+ trace_qed_unref_l2_cache_entry(entry, entry->ref);
+ if (entry->ref == 0) {
+ qemu_vfree(entry->table);
+ g_free(entry);
+ }
+}
+
+/**
+ * Find an entry in the L2 cache. This may return NULL and it's up to the
+ * caller to satisfy the cache miss.
+ *
+ * For a cached entry, this function increases the reference count and returns
+ * the entry.
+ */
+CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
+{
+ CachedL2Table *entry;
+
+ QTAILQ_FOREACH(entry, &l2_cache->entries, node) {
+ if (entry->offset == offset) {
+ trace_qed_find_l2_cache_entry(l2_cache, entry, offset, entry->ref);
+ entry->ref++;
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Commit an L2 cache entry into the cache. This is meant to be used as part of
+ * the process to satisfy a cache miss. A caller would allocate an entry which
+ * is not actually in the L2 cache and then once the entry was valid and
+ * present on disk, the entry can be committed into the cache.
+ *
+ * Since the cache is write-through, it's important that this function is not
+ * called until the entry is present on disk and the L1 has been updated to
+ * point to the entry.
+ *
+ * N.B. This function steals a reference to the l2_table from the caller so the
+ * caller must obtain a new reference by issuing a call to
+ * qed_find_l2_cache_entry().
+ */
+void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
+{
+ CachedL2Table *entry;
+
+ entry = qed_find_l2_cache_entry(l2_cache, l2_table->offset);
+ if (entry) {
+ qed_unref_l2_cache_entry(entry);
+ qed_unref_l2_cache_entry(l2_table);
+ return;
+ }
+
+ /* Evict an unused cache entry so we have space. If all entries are in use
+ * we can grow the cache temporarily and we try to shrink back down later.
+ */
+ if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
+ CachedL2Table *next;
+ QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
+ if (entry->ref > 1) {
+ continue;
+ }
+
+ QTAILQ_REMOVE(&l2_cache->entries, entry, node);
+ l2_cache->n_entries--;
+ qed_unref_l2_cache_entry(entry);
+
+ /* Stop evicting when we've shrunk back to max size */
+ if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
+ break;
+ }
+ }
+ }
+
+ l2_cache->n_entries++;
+ QTAILQ_INSERT_TAIL(&l2_cache->entries, l2_table, node);
+}
diff --git a/contrib/qemu/block/qed-table.c b/contrib/qemu/block/qed-table.c
new file mode 100644
index 000000000..76d2dcccf
--- /dev/null
+++ b/contrib/qemu/block/qed-table.c
@@ -0,0 +1,296 @@
+/*
+ * QEMU Enhanced Disk Format Table I/O
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
+#include "qed.h"
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ QEDTable *table;
+
+ struct iovec iov;
+ QEMUIOVector qiov;
+} QEDReadTableCB;
+
+static void qed_read_table_cb(void *opaque, int ret)
+{
+ QEDReadTableCB *read_table_cb = opaque;
+ QEDTable *table = read_table_cb->table;
+ int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
+ int i;
+
+ /* Handle I/O error */
+ if (ret) {
+ goto out;
+ }
+
+ /* Byteswap offsets */
+ for (i = 0; i < noffsets; i++) {
+ table->offsets[i] = le64_to_cpu(table->offsets[i]);
+ }
+
+out:
+ /* Completion */
+ trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
+ gencb_complete(&read_table_cb->gencb, ret);
+}
+
+static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
+ cb, opaque);
+ QEMUIOVector *qiov = &read_table_cb->qiov;
+
+ trace_qed_read_table(s, offset, table);
+
+ read_table_cb->s = s;
+ read_table_cb->table = table;
+ read_table_cb->iov.iov_base = table->offsets,
+ read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
+
+ qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
+ bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
+ qiov->size / BDRV_SECTOR_SIZE,
+ qed_read_table_cb, read_table_cb);
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ QEDTable *orig_table;
+ QEDTable *table;
+ bool flush; /* flush after write? */
+
+ struct iovec iov;
+ QEMUIOVector qiov;
+} QEDWriteTableCB;
+
+static void qed_write_table_cb(void *opaque, int ret)
+{
+ QEDWriteTableCB *write_table_cb = opaque;
+
+ trace_qed_write_table_cb(write_table_cb->s,
+ write_table_cb->orig_table,
+ write_table_cb->flush,
+ ret);
+
+ if (ret) {
+ goto out;
+ }
+
+ if (write_table_cb->flush) {
+ /* We still need to flush first */
+ write_table_cb->flush = false;
+ bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
+ write_table_cb);
+ return;
+ }
+
+out:
+ qemu_vfree(write_table_cb->table);
+ gencb_complete(&write_table_cb->gencb, ret);
+}
+
+/**
+ * Write out an updated part or all of a table
+ *
+ * @s: QED state
+ * @offset: Offset of table in image file, in bytes
+ * @table: Table
+ * @index: Index of first element
+ * @n: Number of elements
+ * @flush: Whether or not to sync to disk
+ * @cb: Completion function
+ * @opaque: Argument for completion function
+ */
+static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+ unsigned int index, unsigned int n, bool flush,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ QEDWriteTableCB *write_table_cb;
+ unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
+ unsigned int start, end, i;
+ size_t len_bytes;
+
+ trace_qed_write_table(s, offset, table, index, n);
+
+ /* Calculate indices of the first and one after last elements */
+ start = index & ~sector_mask;
+ end = (index + n + sector_mask) & ~sector_mask;
+
+ len_bytes = (end - start) * sizeof(uint64_t);
+
+ write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
+ write_table_cb->s = s;
+ write_table_cb->orig_table = table;
+ write_table_cb->flush = flush;
+ write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
+ write_table_cb->iov.iov_base = write_table_cb->table->offsets;
+ write_table_cb->iov.iov_len = len_bytes;
+ qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
+
+ /* Byteswap table */
+ for (i = start; i < end; i++) {
+ uint64_t le_offset = cpu_to_le64(table->offsets[i]);
+ write_table_cb->table->offsets[i - start] = le_offset;
+ }
+
+ /* Adjust for offset into table */
+ offset += start * sizeof(uint64_t);
+
+ bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ &write_table_cb->qiov,
+ write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
+ qed_write_table_cb, write_table_cb);
+}
+
+/**
+ * Propagate return value from async callback
+ */
+static void qed_sync_cb(void *opaque, int ret)
+{
+ *(int *)opaque = ret;
+}
+
+int qed_read_l1_table_sync(BDRVQEDState *s)
+{
+ int ret = -EINPROGRESS;
+
+ qed_read_table(s, s->header.l1_table_offset,
+ s->l1_table, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
+
+void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
+ qed_write_table(s, s->header.l1_table_offset,
+ s->l1_table, index, n, false, cb, opaque);
+}
+
+int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
+ unsigned int n)
+{
+ int ret = -EINPROGRESS;
+
+ qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ uint64_t l2_offset;
+ QEDRequest *request;
+} QEDReadL2TableCB;
+
+static void qed_read_l2_table_cb(void *opaque, int ret)
+{
+ QEDReadL2TableCB *read_l2_table_cb = opaque;
+ QEDRequest *request = read_l2_table_cb->request;
+ BDRVQEDState *s = read_l2_table_cb->s;
+ CachedL2Table *l2_table = request->l2_table;
+ uint64_t l2_offset = read_l2_table_cb->l2_offset;
+
+ if (ret) {
+ /* can't trust loaded L2 table anymore */
+ qed_unref_l2_cache_entry(l2_table);
+ request->l2_table = NULL;
+ } else {
+ l2_table->offset = l2_offset;
+
+ qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
+
+ /* This is guaranteed to succeed because we just committed the entry
+ * to the cache.
+ */
+ request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
+ assert(request->l2_table != NULL);
+ }
+
+ gencb_complete(&read_l2_table_cb->gencb, ret);
+}
+
+void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ QEDReadL2TableCB *read_l2_table_cb;
+
+ qed_unref_l2_cache_entry(request->l2_table);
+
+ /* Check for cached L2 entry */
+ request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
+ if (request->l2_table) {
+ cb(opaque, 0);
+ return;
+ }
+
+ request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
+ request->l2_table->table = qed_alloc_table(s);
+
+ read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
+ read_l2_table_cb->s = s;
+ read_l2_table_cb->l2_offset = offset;
+ read_l2_table_cb->request = request;
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
+ qed_read_table(s, offset, request->l2_table->table,
+ qed_read_l2_table_cb, read_l2_table_cb);
+}
+
+int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
+{
+ int ret = -EINPROGRESS;
+
+ qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
+
+void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
+ qed_write_table(s, request->l2_table->offset,
+ request->l2_table->table, index, n, flush, cb, opaque);
+}
+
+int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush)
+{
+ int ret = -EINPROGRESS;
+
+ qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
diff --git a/contrib/qemu/block/qed.c b/contrib/qemu/block/qed.c
new file mode 100644
index 000000000..f767b0528
--- /dev/null
+++ b/contrib/qemu/block/qed.c
@@ -0,0 +1,1596 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/timer.h"
+#include "trace.h"
+#include "qed.h"
+#include "qapi/qmp/qerror.h"
+#include "migration/migration.h"
+
+static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ QEDAIOCB *acb = (QEDAIOCB *)blockacb;
+ bool finished = false;
+
+ /* Wait for the request to finish */
+ acb->finished = &finished;
+ while (!finished) {
+ qemu_aio_wait();
+ }
+}
+
+static const AIOCBInfo qed_aiocb_info = {
+ .aiocb_size = sizeof(QEDAIOCB),
+ .cancel = qed_aio_cancel,
+};
+
+static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
+ const char *filename)
+{
+ const QEDHeader *header = (const QEDHeader *)buf;
+
+ if (buf_size < sizeof(*header)) {
+ return 0;
+ }
+ if (le32_to_cpu(header->magic) != QED_MAGIC) {
+ return 0;
+ }
+ return 100;
+}
+
+/**
+ * Check whether an image format is raw
+ *
+ * @fmt: Backing file format, may be NULL
+ */
+static bool qed_fmt_is_raw(const char *fmt)
+{
+ return fmt && strcmp(fmt, "raw") == 0;
+}
+
+static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
+{
+ cpu->magic = le32_to_cpu(le->magic);
+ cpu->cluster_size = le32_to_cpu(le->cluster_size);
+ cpu->table_size = le32_to_cpu(le->table_size);
+ cpu->header_size = le32_to_cpu(le->header_size);
+ cpu->features = le64_to_cpu(le->features);
+ cpu->compat_features = le64_to_cpu(le->compat_features);
+ cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
+ cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
+ cpu->image_size = le64_to_cpu(le->image_size);
+ cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
+ cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
+}
+
+static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
+{
+ le->magic = cpu_to_le32(cpu->magic);
+ le->cluster_size = cpu_to_le32(cpu->cluster_size);
+ le->table_size = cpu_to_le32(cpu->table_size);
+ le->header_size = cpu_to_le32(cpu->header_size);
+ le->features = cpu_to_le64(cpu->features);
+ le->compat_features = cpu_to_le64(cpu->compat_features);
+ le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
+ le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
+ le->image_size = cpu_to_le64(cpu->image_size);
+ le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
+ le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
+}
+
+int qed_write_header_sync(BDRVQEDState *s)
+{
+ QEDHeader le;
+ int ret;
+
+ qed_header_cpu_to_le(&s->header, &le);
+ ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
+ if (ret != sizeof(le)) {
+ return ret;
+ }
+ return 0;
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ struct iovec iov;
+ QEMUIOVector qiov;
+ int nsectors;
+ uint8_t *buf;
+} QEDWriteHeaderCB;
+
+static void qed_write_header_cb(void *opaque, int ret)
+{
+ QEDWriteHeaderCB *write_header_cb = opaque;
+
+ qemu_vfree(write_header_cb->buf);
+ gencb_complete(write_header_cb, ret);
+}
+
+static void qed_write_header_read_cb(void *opaque, int ret)
+{
+ QEDWriteHeaderCB *write_header_cb = opaque;
+ BDRVQEDState *s = write_header_cb->s;
+
+ if (ret) {
+ qed_write_header_cb(write_header_cb, ret);
+ return;
+ }
+
+ /* Update header */
+ qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
+
+ bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
+ write_header_cb->nsectors, qed_write_header_cb,
+ write_header_cb);
+}
+
+/**
+ * Update header in-place (does not rewrite backing filename or other strings)
+ *
+ * This function only updates known header fields in-place and does not affect
+ * extra data after the QED header.
+ */
+static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
+ void *opaque)
+{
+ /* We must write full sectors for O_DIRECT but cannot necessarily generate
+ * the data following the header if an unrecognized compat feature is
+ * active. Therefore, first read the sectors containing the header, update
+ * them, and write back.
+ */
+
+ int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
+ BDRV_SECTOR_SIZE;
+ size_t len = nsectors * BDRV_SECTOR_SIZE;
+ QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
+ cb, opaque);
+
+ write_header_cb->s = s;
+ write_header_cb->nsectors = nsectors;
+ write_header_cb->buf = qemu_blockalign(s->bs, len);
+ write_header_cb->iov.iov_base = write_header_cb->buf;
+ write_header_cb->iov.iov_len = len;
+ qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
+
+ bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
+ qed_write_header_read_cb, write_header_cb);
+}
+
+static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
+{
+ uint64_t table_entries;
+ uint64_t l2_size;
+
+ table_entries = (table_size * cluster_size) / sizeof(uint64_t);
+ l2_size = table_entries * cluster_size;
+
+ return l2_size * table_entries;
+}
+
+static bool qed_is_cluster_size_valid(uint32_t cluster_size)
+{
+ if (cluster_size < QED_MIN_CLUSTER_SIZE ||
+ cluster_size > QED_MAX_CLUSTER_SIZE) {
+ return false;
+ }
+ if (cluster_size & (cluster_size - 1)) {
+ return false; /* not power of 2 */
+ }
+ return true;
+}
+
+static bool qed_is_table_size_valid(uint32_t table_size)
+{
+ if (table_size < QED_MIN_TABLE_SIZE ||
+ table_size > QED_MAX_TABLE_SIZE) {
+ return false;
+ }
+ if (table_size & (table_size - 1)) {
+ return false; /* not power of 2 */
+ }
+ return true;
+}
+
+static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
+ uint32_t table_size)
+{
+ if (image_size % BDRV_SECTOR_SIZE != 0) {
+ return false; /* not multiple of sector size */
+ }
+ if (image_size > qed_max_image_size(cluster_size, table_size)) {
+ return false; /* image is too large */
+ }
+ return true;
+}
+
+/**
+ * Read a string of known length from the image file
+ *
+ * @file: Image file
+ * @offset: File offset to start of string, in bytes
+ * @n: String length in bytes
+ * @buf: Destination buffer
+ * @buflen: Destination buffer length in bytes
+ * @ret: 0 on success, -errno on failure
+ *
+ * The string is NUL-terminated.
+ */
+static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
+ char *buf, size_t buflen)
+{
+ int ret;
+ if (n >= buflen) {
+ return -EINVAL;
+ }
+ ret = bdrv_pread(file, offset, buf, n);
+ if (ret < 0) {
+ return ret;
+ }
+ buf[n] = '\0';
+ return 0;
+}
+
+/**
+ * Allocate new clusters
+ *
+ * @s: QED state
+ * @n: Number of contiguous clusters to allocate
+ * @ret: Offset of first allocated cluster
+ *
+ * This function only produces the offset where the new clusters should be
+ * written. It updates BDRVQEDState but does not make any changes to the image
+ * file.
+ */
+static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
+{
+ uint64_t offset = s->file_size;
+ s->file_size += n * s->header.cluster_size;
+ return offset;
+}
+
+QEDTable *qed_alloc_table(BDRVQEDState *s)
+{
+ /* Honor O_DIRECT memory alignment requirements */
+ return qemu_blockalign(s->bs,
+ s->header.cluster_size * s->header.table_size);
+}
+
+/**
+ * Allocate a new zeroed L2 table
+ */
+static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
+{
+ CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
+
+ l2_table->table = qed_alloc_table(s);
+ l2_table->offset = qed_alloc_clusters(s, s->header.table_size);
+
+ memset(l2_table->table->offsets, 0,
+ s->header.cluster_size * s->header.table_size);
+ return l2_table;
+}
+
+static void qed_aio_next_io(void *opaque, int ret);
+
+static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+{
+ assert(!s->allocating_write_reqs_plugged);
+
+ s->allocating_write_reqs_plugged = true;
+}
+
+static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
+{
+ QEDAIOCB *acb;
+
+ assert(s->allocating_write_reqs_plugged);
+
+ s->allocating_write_reqs_plugged = false;
+
+ acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+ if (acb) {
+ qed_aio_next_io(acb, 0);
+ }
+}
+
+static void qed_finish_clear_need_check(void *opaque, int ret)
+{
+ /* Do nothing */
+}
+
+static void qed_flush_after_clear_need_check(void *opaque, int ret)
+{
+ BDRVQEDState *s = opaque;
+
+ bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
+
+ /* No need to wait until flush completes */
+ qed_unplug_allocating_write_reqs(s);
+}
+
+static void qed_clear_need_check(void *opaque, int ret)
+{
+ BDRVQEDState *s = opaque;
+
+ if (ret) {
+ qed_unplug_allocating_write_reqs(s);
+ return;
+ }
+
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header(s, qed_flush_after_clear_need_check, s);
+}
+
+static void qed_need_check_timer_cb(void *opaque)
+{
+ BDRVQEDState *s = opaque;
+
+ /* The timer should only fire when allocating writes have drained */
+ assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
+
+ trace_qed_need_check_timer_cb(s);
+
+ qed_plug_allocating_write_reqs(s);
+
+ /* Ensure writes are on disk before clearing flag */
+ bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+}
+
+static void qed_start_need_check_timer(BDRVQEDState *s)
+{
+ trace_qed_start_need_check_timer(s);
+
+ /* Use vm_clock so we don't alter the image file while suspended for
+ * migration.
+ */
+ qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
+ get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+}
+
+/* It's okay to call this multiple times or when no timer is started */
+static void qed_cancel_need_check_timer(BDRVQEDState *s)
+{
+ trace_qed_cancel_need_check_timer(s);
+ qemu_del_timer(s->need_check_timer);
+}
+
+static void bdrv_qed_rebind(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+ s->bs = bs;
+}
+
+static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags)
+{
+ BDRVQEDState *s = bs->opaque;
+ QEDHeader le_header;
+ int64_t file_size;
+ int ret;
+
+ s->bs = bs;
+ QSIMPLEQ_INIT(&s->allocating_write_reqs);
+
+ ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
+ if (ret < 0) {
+ return ret;
+ }
+ qed_header_le_to_cpu(&le_header, &s->header);
+
+ if (s->header.magic != QED_MAGIC) {
+ return -EMEDIUMTYPE;
+ }
+ if (s->header.features & ~QED_FEATURE_MASK) {
+ /* image uses unsupported feature bits */
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%" PRIx64,
+ s->header.features & ~QED_FEATURE_MASK);
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "QED", buf);
+ return -ENOTSUP;
+ }
+ if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
+ return -EINVAL;
+ }
+
+ /* Round down file size to the last cluster */
+ file_size = bdrv_getlength(bs->file);
+ if (file_size < 0) {
+ return file_size;
+ }
+ s->file_size = qed_start_of_cluster(s, file_size);
+
+ if (!qed_is_table_size_valid(s->header.table_size)) {
+ return -EINVAL;
+ }
+ if (!qed_is_image_size_valid(s->header.image_size,
+ s->header.cluster_size,
+ s->header.table_size)) {
+ return -EINVAL;
+ }
+ if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
+ return -EINVAL;
+ }
+
+ s->table_nelems = (s->header.cluster_size * s->header.table_size) /
+ sizeof(uint64_t);
+ s->l2_shift = ffs(s->header.cluster_size) - 1;
+ s->l2_mask = s->table_nelems - 1;
+ s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
+
+ if ((s->header.features & QED_F_BACKING_FILE)) {
+ if ((uint64_t)s->header.backing_filename_offset +
+ s->header.backing_filename_size >
+ s->header.cluster_size * s->header.header_size) {
+ return -EINVAL;
+ }
+
+ ret = qed_read_string(bs->file, s->header.backing_filename_offset,
+ s->header.backing_filename_size, bs->backing_file,
+ sizeof(bs->backing_file));
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
+ }
+ }
+
+ /* Reset unknown autoclear feature bits. This is a backwards
+ * compatibility mechanism that allows images to be opened by older
+ * programs, which "knock out" unknown feature bits. When an image is
+ * opened by a newer program again it can detect that the autoclear
+ * feature is no longer valid.
+ */
+ if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
+ !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
+ s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
+
+ ret = qed_write_header_sync(s);
+ if (ret) {
+ return ret;
+ }
+
+ /* From here on only known autoclear feature bits are valid */
+ bdrv_flush(bs->file);
+ }
+
+ s->l1_table = qed_alloc_table(s);
+ qed_init_l2_cache(&s->l2_cache);
+
+ ret = qed_read_l1_table_sync(s);
+ if (ret) {
+ goto out;
+ }
+
+ /* If image was not closed cleanly, check consistency */
+ if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
+ /* Read-only images cannot be fixed. There is no risk of corruption
+ * since write operations are not possible. Therefore, allow
+ * potentially inconsistent images to be opened read-only. This can
+ * aid data recovery from an otherwise inconsistent image.
+ */
+ if (!bdrv_is_read_only(bs->file) &&
+ !(flags & BDRV_O_INCOMING)) {
+ BdrvCheckResult result = {0};
+
+ ret = qed_check(s, &result, true);
+ if (ret) {
+ goto out;
+ }
+ }
+ }
+
+ s->need_check_timer = qemu_new_timer_ns(vm_clock,
+ qed_need_check_timer_cb, s);
+
+out:
+ if (ret) {
+ qed_free_l2_cache(&s->l2_cache);
+ qemu_vfree(s->l1_table);
+ }
+ return ret;
+}
+
+/* We have nothing to do for QED reopen, stubs just return
+ * success */
+static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static void bdrv_qed_close(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ qed_cancel_need_check_timer(s);
+ qemu_free_timer(s->need_check_timer);
+
+ /* Ensure writes reach stable storage */
+ bdrv_flush(bs->file);
+
+ /* Clean shutdown, no check required on next open */
+ if (s->header.features & QED_F_NEED_CHECK) {
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header_sync(s);
+ }
+
+ qed_free_l2_cache(&s->l2_cache);
+ qemu_vfree(s->l1_table);
+}
+
+static int qed_create(const char *filename, uint32_t cluster_size,
+ uint64_t image_size, uint32_t table_size,
+ const char *backing_file, const char *backing_fmt)
+{
+ QEDHeader header = {
+ .magic = QED_MAGIC,
+ .cluster_size = cluster_size,
+ .table_size = table_size,
+ .header_size = 1,
+ .features = 0,
+ .compat_features = 0,
+ .l1_table_offset = cluster_size,
+ .image_size = image_size,
+ };
+ QEDHeader le_header;
+ uint8_t *l1_table = NULL;
+ size_t l1_size = header.cluster_size * header.table_size;
+ int ret = 0;
+ BlockDriverState *bs = NULL;
+
+ ret = bdrv_create_file(filename, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* File must start empty and grow, check truncate is supported */
+ ret = bdrv_truncate(bs, 0);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (backing_file) {
+ header.features |= QED_F_BACKING_FILE;
+ header.backing_filename_offset = sizeof(le_header);
+ header.backing_filename_size = strlen(backing_file);
+
+ if (qed_fmt_is_raw(backing_fmt)) {
+ header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
+ }
+ }
+
+ qed_header_cpu_to_le(&header, &le_header);
+ ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
+ if (ret < 0) {
+ goto out;
+ }
+ ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
+ header.backing_filename_size);
+ if (ret < 0) {
+ goto out;
+ }
+
+ l1_table = g_malloc0(l1_size);
+ ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0; /* success */
+out:
+ g_free(l1_table);
+ bdrv_delete(bs);
+ return ret;
+}
+
+static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
+{
+ uint64_t image_size = 0;
+ uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
+ uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
+ const char *backing_file = NULL;
+ const char *backing_fmt = NULL;
+
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ image_size = options->value.n;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
+ backing_fmt = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
+ if (options->value.n) {
+ cluster_size = options->value.n;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) {
+ if (options->value.n) {
+ table_size = options->value.n;
+ }
+ }
+ options++;
+ }
+
+ if (!qed_is_cluster_size_valid(cluster_size)) {
+ fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
+ QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
+ return -EINVAL;
+ }
+ if (!qed_is_table_size_valid(table_size)) {
+ fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
+ QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
+ return -EINVAL;
+ }
+ if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
+ fprintf(stderr, "QED image size must be a non-zero multiple of "
+ "cluster size and less than %" PRIu64 " bytes\n",
+ qed_max_image_size(cluster_size, table_size));
+ return -EINVAL;
+ }
+
+ return qed_create(filename, cluster_size, image_size, table_size,
+ backing_file, backing_fmt);
+}
+
+typedef struct {
+ Coroutine *co;
+ int is_allocated;
+ int *pnum;
+} QEDIsAllocatedCB;
+
+static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
+{
+ QEDIsAllocatedCB *cb = opaque;
+ *cb->pnum = len / BDRV_SECTOR_SIZE;
+ cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
+ if (cb->co) {
+ qemu_coroutine_enter(cb->co, NULL);
+ }
+}
+
+static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BDRVQEDState *s = bs->opaque;
+ uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
+ size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
+ QEDIsAllocatedCB cb = {
+ .is_allocated = -1,
+ .pnum = pnum,
+ };
+ QEDRequest request = { .l2_table = NULL };
+
+ qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
+
+ /* Now sleep if the callback wasn't invoked immediately */
+ while (cb.is_allocated == -1) {
+ cb.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
+ }
+
+ qed_unref_l2_cache_entry(request.l2_table);
+
+ return cb.is_allocated;
+}
+
+static int bdrv_qed_make_empty(BlockDriverState *bs)
+{
+ return -ENOTSUP;
+}
+
+static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
+{
+ return acb->common.bs->opaque;
+}
+
+/**
+ * Read from the backing file or zero-fill if no backing file
+ *
+ * @s: QED state
+ * @pos: Byte position in device
+ * @qiov: Destination I/O vector
+ * @cb: Completion function
+ * @opaque: User data for completion function
+ *
+ * This function reads qiov->size bytes starting at pos from the backing file.
+ * If there is no backing file then zeroes are read.
+ */
+static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
+ QEMUIOVector *qiov,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ uint64_t backing_length = 0;
+ size_t size;
+
+ /* If there is a backing file, get its length. Treat the absence of a
+ * backing file like a zero length backing file.
+ */
+ if (s->bs->backing_hd) {
+ int64_t l = bdrv_getlength(s->bs->backing_hd);
+ if (l < 0) {
+ cb(opaque, l);
+ return;
+ }
+ backing_length = l;
+ }
+
+ /* Zero all sectors if reading beyond the end of the backing file */
+ if (pos >= backing_length ||
+ pos + qiov->size > backing_length) {
+ qemu_iovec_memset(qiov, 0, 0, qiov->size);
+ }
+
+ /* Complete now if there are no backing file sectors to read */
+ if (pos >= backing_length) {
+ cb(opaque, 0);
+ return;
+ }
+
+ /* If the read straddles the end of the backing file, shorten it */
+ size = MIN((uint64_t)backing_length - pos, qiov->size);
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
+ bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
+ qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ uint64_t offset;
+} CopyFromBackingFileCB;
+
+static void qed_copy_from_backing_file_cb(void *opaque, int ret)
+{
+ CopyFromBackingFileCB *copy_cb = opaque;
+ qemu_vfree(copy_cb->iov.iov_base);
+ gencb_complete(&copy_cb->gencb, ret);
+}
+
+static void qed_copy_from_backing_file_write(void *opaque, int ret)
+{
+ CopyFromBackingFileCB *copy_cb = opaque;
+ BDRVQEDState *s = copy_cb->s;
+
+ if (ret) {
+ qed_copy_from_backing_file_cb(copy_cb, ret);
+ return;
+ }
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
+ bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
+ &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
+ qed_copy_from_backing_file_cb, copy_cb);
+}
+
+/**
+ * Copy data from backing file into the image
+ *
+ * @s: QED state
+ * @pos: Byte position in device
+ * @len: Number of bytes
+ * @offset: Byte offset in image file
+ * @cb: Completion function
+ * @opaque: User data for completion function
+ */
+static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
+ uint64_t len, uint64_t offset,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ CopyFromBackingFileCB *copy_cb;
+
+ /* Skip copy entirely if there is no work to do */
+ if (len == 0) {
+ cb(opaque, 0);
+ return;
+ }
+
+ copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
+ copy_cb->s = s;
+ copy_cb->offset = offset;
+ copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
+ copy_cb->iov.iov_len = len;
+ qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
+
+ qed_read_backing_file(s, pos, &copy_cb->qiov,
+ qed_copy_from_backing_file_write, copy_cb);
+}
+
+/**
+ * Link one or more contiguous clusters into a table
+ *
+ * @s: QED state
+ * @table: L2 table
+ * @index: First cluster index
+ * @n: Number of contiguous clusters
+ * @cluster: First cluster offset
+ *
+ * The cluster offset may be an allocated byte offset in the image file, the
+ * zero cluster marker, or the unallocated cluster marker.
+ */
+static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
+ unsigned int n, uint64_t cluster)
+{
+ int i;
+ for (i = index; i < index + n; i++) {
+ table->offsets[i] = cluster;
+ if (!qed_offset_is_unalloc_cluster(cluster) &&
+ !qed_offset_is_zero_cluster(cluster)) {
+ cluster += s->header.cluster_size;
+ }
+ }
+}
+
+static void qed_aio_complete_bh(void *opaque)
+{
+ QEDAIOCB *acb = opaque;
+ BlockDriverCompletionFunc *cb = acb->common.cb;
+ void *user_opaque = acb->common.opaque;
+ int ret = acb->bh_ret;
+ bool *finished = acb->finished;
+
+ qemu_bh_delete(acb->bh);
+ qemu_aio_release(acb);
+
+ /* Invoke callback */
+ cb(user_opaque, ret);
+
+ /* Signal cancel completion */
+ if (finished) {
+ *finished = true;
+ }
+}
+
+static void qed_aio_complete(QEDAIOCB *acb, int ret)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+
+ trace_qed_aio_complete(s, acb, ret);
+
+ /* Free resources */
+ qemu_iovec_destroy(&acb->cur_qiov);
+ qed_unref_l2_cache_entry(acb->request.l2_table);
+
+ /* Free the buffer we may have allocated for zero writes */
+ if (acb->flags & QED_AIOCB_ZERO) {
+ qemu_vfree(acb->qiov->iov[0].iov_base);
+ acb->qiov->iov[0].iov_base = NULL;
+ }
+
+ /* Arrange for a bh to invoke the completion function */
+ acb->bh_ret = ret;
+ acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
+ qemu_bh_schedule(acb->bh);
+
+ /* Start next allocating write request waiting behind this one. Note that
+ * requests enqueue themselves when they first hit an unallocated cluster
+ * but they wait until the entire request is finished before waking up the
+ * next request in the queue. This ensures that we don't cycle through
+ * requests multiple times but rather finish one at a time completely.
+ */
+ if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+ QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
+ acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+ if (acb) {
+ qed_aio_next_io(acb, 0);
+ } else if (s->header.features & QED_F_NEED_CHECK) {
+ qed_start_need_check_timer(s);
+ }
+ }
+}
+
+/**
+ * Commit the current L2 table to the cache
+ */
+static void qed_commit_l2_update(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ CachedL2Table *l2_table = acb->request.l2_table;
+ uint64_t l2_offset = l2_table->offset;
+
+ qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
+
+ /* This is guaranteed to succeed because we just committed the entry to the
+ * cache.
+ */
+ acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
+ assert(acb->request.l2_table != NULL);
+
+ qed_aio_next_io(opaque, ret);
+}
+
+/**
+ * Update L1 table with new L2 table offset and write it out
+ */
+static void qed_aio_write_l1_update(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ int index;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ index = qed_l1_index(s, acb->cur_pos);
+ s->l1_table->offsets[index] = acb->request.l2_table->offset;
+
+ qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
+}
+
+/**
+ * Update L2 table with new cluster offsets and write them out
+ */
+static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+ bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
+ int index;
+
+ if (ret) {
+ goto err;
+ }
+
+ if (need_alloc) {
+ qed_unref_l2_cache_entry(acb->request.l2_table);
+ acb->request.l2_table = qed_new_l2_table(s);
+ }
+
+ index = qed_l2_index(s, acb->cur_pos);
+ qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
+ offset);
+
+ if (need_alloc) {
+ /* Write out the whole new L2 table */
+ qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
+ qed_aio_write_l1_update, acb);
+ } else {
+ /* Write out only the updated part of the L2 table */
+ qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
+ qed_aio_next_io, acb);
+ }
+ return;
+
+err:
+ qed_aio_complete(acb, ret);
+}
+
+static void qed_aio_write_l2_update_cb(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
+}
+
+/**
+ * Flush new data clusters before updating the L2 table
+ *
+ * This flush is necessary when a backing file is in use. A crash during an
+ * allocating write could result in empty clusters in the image. If the write
+ * only touched a subregion of the cluster, then backing image sectors have
+ * been lost in the untouched region. The solution is to flush after writing a
+ * new data cluster and before updating the L2 table.
+ */
+static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+
+ if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
+ qed_aio_complete(acb, -EIO);
+ }
+}
+
+/**
+ * Write data to the image file
+ */
+static void qed_aio_write_main(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ uint64_t offset = acb->cur_cluster +
+ qed_offset_into_cluster(s, acb->cur_pos);
+ BlockDriverCompletionFunc *next_fn;
+
+ trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
+ next_fn = qed_aio_next_io;
+ } else {
+ if (s->bs->backing_hd) {
+ next_fn = qed_aio_write_flush_before_l2_update;
+ } else {
+ next_fn = qed_aio_write_l2_update_cb;
+ }
+ }
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
+ bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
+ next_fn, acb);
+}
+
+/**
+ * Populate back untouched region of new data cluster
+ */
+static void qed_aio_write_postfill(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ uint64_t start = acb->cur_pos + acb->cur_qiov.size;
+ uint64_t len =
+ qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
+ uint64_t offset = acb->cur_cluster +
+ qed_offset_into_cluster(s, acb->cur_pos) +
+ acb->cur_qiov.size;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ trace_qed_aio_write_postfill(s, acb, start, len, offset);
+ qed_copy_from_backing_file(s, start, len, offset,
+ qed_aio_write_main, acb);
+}
+
+/**
+ * Populate front untouched region of new data cluster
+ */
+static void qed_aio_write_prefill(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
+ uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
+
+ trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
+ qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
+ qed_aio_write_postfill, acb);
+}
+
+/**
+ * Check if the QED_F_NEED_CHECK bit should be set during allocating write
+ */
+static bool qed_should_set_need_check(BDRVQEDState *s)
+{
+ /* The flush before L2 update path ensures consistency */
+ if (s->bs->backing_hd) {
+ return false;
+ }
+
+ return !(s->header.features & QED_F_NEED_CHECK);
+}
+
+static void qed_aio_write_zero_cluster(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ qed_aio_write_l2_update(acb, 0, 1);
+}
+
+/**
+ * Write new data cluster
+ *
+ * @acb: Write request
+ * @len: Length in bytes
+ *
+ * This path is taken when writing to previously unallocated clusters.
+ */
+static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+ BlockDriverCompletionFunc *cb;
+
+ /* Cancel timer when the first allocating request comes in */
+ if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
+ qed_cancel_need_check_timer(s);
+ }
+
+ /* Freeze this request if another allocating write is in progress */
+ if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+ QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
+ }
+ if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
+ s->allocating_write_reqs_plugged) {
+ return; /* wait for existing request to finish */
+ }
+
+ acb->cur_nclusters = qed_bytes_to_clusters(s,
+ qed_offset_into_cluster(s, acb->cur_pos) + len);
+ qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+
+ if (acb->flags & QED_AIOCB_ZERO) {
+ /* Skip ahead if the clusters are already zero */
+ if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
+ qed_aio_next_io(acb, 0);
+ return;
+ }
+
+ cb = qed_aio_write_zero_cluster;
+ } else {
+ cb = qed_aio_write_prefill;
+ acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
+ }
+
+ if (qed_should_set_need_check(s)) {
+ s->header.features |= QED_F_NEED_CHECK;
+ qed_write_header(s, cb, acb);
+ } else {
+ cb(acb, 0);
+ }
+}
+
+/**
+ * Write data cluster in place
+ *
+ * @acb: Write request
+ * @offset: Cluster offset in bytes
+ * @len: Length in bytes
+ *
+ * This path is taken when writing to already allocated clusters.
+ */
+static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
+{
+ /* Allocate buffer for zero writes */
+ if (acb->flags & QED_AIOCB_ZERO) {
+ struct iovec *iov = acb->qiov->iov;
+
+ if (!iov->iov_base) {
+ iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
+ memset(iov->iov_base, 0, iov->iov_len);
+ }
+ }
+
+ /* Calculate the I/O vector */
+ acb->cur_cluster = offset;
+ qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+
+ /* Do the actual write */
+ qed_aio_write_main(acb, 0);
+}
+
+/**
+ * Write data cluster
+ *
+ * @opaque: Write request
+ * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
+ * or -errno
+ * @offset: Cluster offset in bytes
+ * @len: Length in bytes
+ *
+ * Callback from qed_find_cluster().
+ */
+static void qed_aio_write_data(void *opaque, int ret,
+ uint64_t offset, size_t len)
+{
+ QEDAIOCB *acb = opaque;
+
+ trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len);
+
+ acb->find_cluster_ret = ret;
+
+ switch (ret) {
+ case QED_CLUSTER_FOUND:
+ qed_aio_write_inplace(acb, offset, len);
+ break;
+
+ case QED_CLUSTER_L2:
+ case QED_CLUSTER_L1:
+ case QED_CLUSTER_ZERO:
+ qed_aio_write_alloc(acb, len);
+ break;
+
+ default:
+ qed_aio_complete(acb, ret);
+ break;
+ }
+}
+
+/**
+ * Read data cluster
+ *
+ * @opaque: Read request
+ * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
+ * or -errno
+ * @offset: Cluster offset in bytes
+ * @len: Length in bytes
+ *
+ * Callback from qed_find_cluster().
+ */
+static void qed_aio_read_data(void *opaque, int ret,
+ uint64_t offset, size_t len)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ BlockDriverState *bs = acb->common.bs;
+
+ /* Adjust offset into cluster */
+ offset += qed_offset_into_cluster(s, acb->cur_pos);
+
+ trace_qed_aio_read_data(s, acb, ret, offset, len);
+
+ if (ret < 0) {
+ goto err;
+ }
+
+ qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+
+ /* Handle zero cluster and backing file reads */
+ if (ret == QED_CLUSTER_ZERO) {
+ qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
+ qed_aio_next_io(acb, 0);
+ return;
+ } else if (ret != QED_CLUSTER_FOUND) {
+ qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
+ qed_aio_next_io, acb);
+ return;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
+ &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
+ qed_aio_next_io, acb);
+ return;
+
+err:
+ qed_aio_complete(acb, ret);
+}
+
+/**
+ * Begin next I/O or complete the request
+ */
+static void qed_aio_next_io(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
+ qed_aio_write_data : qed_aio_read_data;
+
+ trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
+
+ /* Handle I/O error */
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ acb->qiov_offset += acb->cur_qiov.size;
+ acb->cur_pos += acb->cur_qiov.size;
+ qemu_iovec_reset(&acb->cur_qiov);
+
+ /* Complete request */
+ if (acb->cur_pos >= acb->end_pos) {
+ qed_aio_complete(acb, 0);
+ return;
+ }
+
+ /* Find next cluster and start I/O */
+ qed_find_cluster(s, &acb->request,
+ acb->cur_pos, acb->end_pos - acb->cur_pos,
+ io_fn, acb);
+}
+
+static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, int flags)
+{
+ QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);
+
+ trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
+ opaque, flags);
+
+ acb->flags = flags;
+ acb->finished = NULL;
+ acb->qiov = qiov;
+ acb->qiov_offset = 0;
+ acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
+ acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
+ acb->request.l2_table = NULL;
+ qemu_iovec_init(&acb->cur_qiov, qiov->niov);
+
+ /* Start request */
+ qed_aio_next_io(acb, 0);
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
+ opaque, QED_AIOCB_WRITE);
+}
+
+typedef struct {
+ Coroutine *co;
+ int ret;
+ bool done;
+} QEDWriteZeroesCB;
+
+static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
+{
+ QEDWriteZeroesCB *cb = opaque;
+
+ cb->done = true;
+ cb->ret = ret;
+ if (cb->co) {
+ qemu_coroutine_enter(cb->co, NULL);
+ }
+}
+
+static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors)
+{
+ BlockDriverAIOCB *blockacb;
+ BDRVQEDState *s = bs->opaque;
+ QEDWriteZeroesCB cb = { .done = false };
+ QEMUIOVector qiov;
+ struct iovec iov;
+
+ /* Refuse if there are untouched backing file sectors */
+ if (bs->backing_hd) {
+ if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
+ return -ENOTSUP;
+ }
+ if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
+ return -ENOTSUP;
+ }
+ }
+
+ /* Zero writes start without an I/O buffer. If a buffer becomes necessary
+ * then it will be allocated during request processing.
+ */
+ iov.iov_base = NULL,
+ iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
+ qed_co_write_zeroes_cb, &cb,
+ QED_AIOCB_WRITE | QED_AIOCB_ZERO);
+ if (!blockacb) {
+ return -EIO;
+ }
+ if (!cb.done) {
+ cb.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
+ }
+ assert(cb.done);
+ return cb.ret;
+}
+
+static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BDRVQEDState *s = bs->opaque;
+ uint64_t old_image_size;
+ int ret;
+
+ if (!qed_is_image_size_valid(offset, s->header.cluster_size,
+ s->header.table_size)) {
+ return -EINVAL;
+ }
+
+ /* Shrinking is currently not supported */
+ if ((uint64_t)offset < s->header.image_size) {
+ return -ENOTSUP;
+ }
+
+ old_image_size = s->header.image_size;
+ s->header.image_size = offset;
+ ret = qed_write_header_sync(s);
+ if (ret < 0) {
+ s->header.image_size = old_image_size;
+ }
+ return ret;
+}
+
+static int64_t bdrv_qed_getlength(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+ return s->header.image_size;
+}
+
+static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ memset(bdi, 0, sizeof(*bdi));
+ bdi->cluster_size = s->header.cluster_size;
+ bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
+ return 0;
+}
+
+static int bdrv_qed_change_backing_file(BlockDriverState *bs,
+ const char *backing_file,
+ const char *backing_fmt)
+{
+ BDRVQEDState *s = bs->opaque;
+ QEDHeader new_header, le_header;
+ void *buffer;
+ size_t buffer_len, backing_file_len;
+ int ret;
+
+ /* Refuse to set backing filename if unknown compat feature bits are
+ * active. If the image uses an unknown compat feature then we may not
+ * know the layout of data following the header structure and cannot safely
+ * add a new string.
+ */
+ if (backing_file && (s->header.compat_features &
+ ~QED_COMPAT_FEATURE_MASK)) {
+ return -ENOTSUP;
+ }
+
+ memcpy(&new_header, &s->header, sizeof(new_header));
+
+ new_header.features &= ~(QED_F_BACKING_FILE |
+ QED_F_BACKING_FORMAT_NO_PROBE);
+
+ /* Adjust feature flags */
+ if (backing_file) {
+ new_header.features |= QED_F_BACKING_FILE;
+
+ if (qed_fmt_is_raw(backing_fmt)) {
+ new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
+ }
+ }
+
+ /* Calculate new header size */
+ backing_file_len = 0;
+
+ if (backing_file) {
+ backing_file_len = strlen(backing_file);
+ }
+
+ buffer_len = sizeof(new_header);
+ new_header.backing_filename_offset = buffer_len;
+ new_header.backing_filename_size = backing_file_len;
+ buffer_len += backing_file_len;
+
+ /* Make sure we can rewrite header without failing */
+ if (buffer_len > new_header.header_size * new_header.cluster_size) {
+ return -ENOSPC;
+ }
+
+ /* Prepare new header */
+ buffer = g_malloc(buffer_len);
+
+ qed_header_cpu_to_le(&new_header, &le_header);
+ memcpy(buffer, &le_header, sizeof(le_header));
+ buffer_len = sizeof(le_header);
+
+ if (backing_file) {
+ memcpy(buffer + buffer_len, backing_file, backing_file_len);
+ buffer_len += backing_file_len;
+ }
+
+ /* Write new header */
+ ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
+ g_free(buffer);
+ if (ret == 0) {
+ memcpy(&s->header, &new_header, sizeof(new_header));
+ }
+ return ret;
+}
+
+static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ bdrv_qed_close(bs);
+ memset(s, 0, sizeof(BDRVQEDState));
+ bdrv_qed_open(bs, NULL, bs->open_flags);
+}
+
+static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ return qed_check(s, result, !!fix);
+}
+
+static QEMUOptionParameter qed_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size (in bytes)"
+ }, {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ }, {
+ .name = BLOCK_OPT_BACKING_FMT,
+ .type = OPT_STRING,
+ .help = "Image format of the base image"
+ }, {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = OPT_SIZE,
+ .help = "Cluster size (in bytes)",
+ .value = { .n = QED_DEFAULT_CLUSTER_SIZE },
+ }, {
+ .name = BLOCK_OPT_TABLE_SIZE,
+ .type = OPT_SIZE,
+ .help = "L1/L2 table size (in clusters)"
+ },
+ { /* end of list */ }
+};
+
+static BlockDriver bdrv_qed = {
+ .format_name = "qed",
+ .instance_size = sizeof(BDRVQEDState),
+ .create_options = qed_create_options,
+
+ .bdrv_probe = bdrv_qed_probe,
+ .bdrv_rebind = bdrv_qed_rebind,
+ .bdrv_open = bdrv_qed_open,
+ .bdrv_close = bdrv_qed_close,
+ .bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
+ .bdrv_create = bdrv_qed_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
+ .bdrv_make_empty = bdrv_qed_make_empty,
+ .bdrv_aio_readv = bdrv_qed_aio_readv,
+ .bdrv_aio_writev = bdrv_qed_aio_writev,
+ .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
+ .bdrv_truncate = bdrv_qed_truncate,
+ .bdrv_getlength = bdrv_qed_getlength,
+ .bdrv_get_info = bdrv_qed_get_info,
+ .bdrv_change_backing_file = bdrv_qed_change_backing_file,
+ .bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
+ .bdrv_check = bdrv_qed_check,
+};
+
+static void bdrv_qed_init(void)
+{
+ bdrv_register(&bdrv_qed);
+}
+
+block_init(bdrv_qed_init);
diff --git a/contrib/qemu/block/qed.h b/contrib/qemu/block/qed.h
new file mode 100644
index 000000000..2b4ddedf3
--- /dev/null
+++ b/contrib/qemu/block/qed.h
@@ -0,0 +1,344 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_QED_H
+#define BLOCK_QED_H
+
+#include "block/block_int.h"
+
+/* The layout of a QED file is as follows:
+ *
+ * +--------+----------+----------+----------+-----+
+ * | header | L1 table | cluster0 | cluster1 | ... |
+ * +--------+----------+----------+----------+-----+
+ *
+ * There is a 2-level pagetable for cluster allocation:
+ *
+ * +----------+
+ * | L1 table |
+ * +----------+
+ * ,------' | '------.
+ * +----------+ | +----------+
+ * | L2 table | ... | L2 table |
+ * +----------+ +----------+
+ * ,------' | '------.
+ * +----------+ | +----------+
+ * | Data | ... | Data |
+ * +----------+ +----------+
+ *
+ * The L1 table is fixed size and always present. L2 tables are allocated on
+ * demand. The L1 table size determines the maximum possible image size; it
+ * can be influenced using the cluster_size and table_size values.
+ *
+ * All fields are little-endian on disk.
+ */
+
+enum {
+ QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
+
+ /* The image supports a backing file */
+ QED_F_BACKING_FILE = 0x01,
+
+ /* The image needs a consistency check before use */
+ QED_F_NEED_CHECK = 0x02,
+
+ /* The backing file format must not be probed, treat as raw image */
+ QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
+
+ /* Feature bits must be used when the on-disk format changes */
+ QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
+ QED_F_NEED_CHECK |
+ QED_F_BACKING_FORMAT_NO_PROBE,
+ QED_COMPAT_FEATURE_MASK = 0, /* supported compat feature bits */
+ QED_AUTOCLEAR_FEATURE_MASK = 0, /* supported autoclear feature bits */
+
+ /* Data is stored in groups of sectors called clusters. Cluster size must
+ * be large to avoid keeping too much metadata. I/O requests that have
+ * sub-cluster size will require read-modify-write.
+ */
+ QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
+ QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
+ QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
+
+ /* Allocated clusters are tracked using a 2-level pagetable. Table size is
+ * a multiple of clusters so large maximum image sizes can be supported
+ * without jacking up the cluster size too much.
+ */
+ QED_MIN_TABLE_SIZE = 1, /* in clusters */
+ QED_MAX_TABLE_SIZE = 16,
+ QED_DEFAULT_TABLE_SIZE = 4,
+
+ /* Delay to flush and clean image after last allocating write completes */
+ QED_NEED_CHECK_TIMEOUT = 5, /* in seconds */
+};
+
+typedef struct {
+ uint32_t magic; /* QED\0 */
+
+ uint32_t cluster_size; /* in bytes */
+ uint32_t table_size; /* for L1 and L2 tables, in clusters */
+ uint32_t header_size; /* in clusters */
+
+ uint64_t features; /* format feature bits */
+ uint64_t compat_features; /* compatible feature bits */
+ uint64_t autoclear_features; /* self-resetting feature bits */
+
+ uint64_t l1_table_offset; /* in bytes */
+ uint64_t image_size; /* total logical image size, in bytes */
+
+ /* if (features & QED_F_BACKING_FILE) */
+ uint32_t backing_filename_offset; /* in bytes from start of header */
+ uint32_t backing_filename_size; /* in bytes */
+} QEDHeader;
+
+typedef struct {
+ uint64_t offsets[0]; /* in bytes */
+} QEDTable;
+
+/* The L2 cache is a simple write-through cache for L2 structures */
+typedef struct CachedL2Table {
+ QEDTable *table;
+ uint64_t offset; /* offset=0 indicates an invalidate entry */
+ QTAILQ_ENTRY(CachedL2Table) node;
+ int ref;
+} CachedL2Table;
+
+typedef struct {
+ QTAILQ_HEAD(, CachedL2Table) entries;
+ unsigned int n_entries;
+} L2TableCache;
+
+typedef struct QEDRequest {
+ CachedL2Table *l2_table;
+} QEDRequest;
+
+enum {
+ QED_AIOCB_WRITE = 0x0001, /* read or write? */
+ QED_AIOCB_ZERO = 0x0002, /* zero write, used with QED_AIOCB_WRITE */
+};
+
+typedef struct QEDAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+ int bh_ret; /* final return status for completion bh */
+ QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
+ int flags; /* QED_AIOCB_* bits ORed together */
+ bool *finished; /* signal for cancel completion */
+ uint64_t end_pos; /* request end on block device, in bytes */
+
+ /* User scatter-gather list */
+ QEMUIOVector *qiov;
+ size_t qiov_offset; /* byte count already processed */
+
+ /* Current cluster scatter-gather list */
+ QEMUIOVector cur_qiov;
+ uint64_t cur_pos; /* position on block device, in bytes */
+ uint64_t cur_cluster; /* cluster offset in image file */
+ unsigned int cur_nclusters; /* number of clusters being accessed */
+ int find_cluster_ret; /* used for L1/L2 update */
+
+ QEDRequest request;
+} QEDAIOCB;
+
+typedef struct {
+ BlockDriverState *bs; /* device */
+ uint64_t file_size; /* length of image file, in bytes */
+
+ QEDHeader header; /* always cpu-endian */
+ QEDTable *l1_table;
+ L2TableCache l2_cache; /* l2 table cache */
+ uint32_t table_nelems;
+ uint32_t l1_shift;
+ uint32_t l2_shift;
+ uint32_t l2_mask;
+
+ /* Allocating write request queue */
+ QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
+ bool allocating_write_reqs_plugged;
+
+ /* Periodic flush and clear need check flag */
+ QEMUTimer *need_check_timer;
+} BDRVQEDState;
+
+enum {
+ QED_CLUSTER_FOUND, /* cluster found */
+ QED_CLUSTER_ZERO, /* zero cluster found */
+ QED_CLUSTER_L2, /* cluster missing in L2 */
+ QED_CLUSTER_L1, /* cluster missing in L1 */
+};
+
+/**
+ * qed_find_cluster() completion callback
+ *
+ * @opaque: User data for completion callback
+ * @ret: QED_CLUSTER_FOUND Success
+ * QED_CLUSTER_L2 Data cluster unallocated in L2
+ * QED_CLUSTER_L1 L2 unallocated in L1
+ * -errno POSIX error occurred
+ * @offset: Data cluster offset
+ * @len: Contiguous bytes starting from cluster offset
+ *
+ * This function is invoked when qed_find_cluster() completes.
+ *
+ * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
+ * in the image file.
+ *
+ * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
+ * table offset, respectively. len is number of contiguous unallocated bytes.
+ */
+typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
+
+/**
+ * Generic callback for chaining async callbacks
+ */
+typedef struct {
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+} GenericCB;
+
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
+void gencb_complete(void *opaque, int ret);
+
+/**
+ * Header functions
+ */
+int qed_write_header_sync(BDRVQEDState *s);
+
+/**
+ * L2 cache functions
+ */
+void qed_init_l2_cache(L2TableCache *l2_cache);
+void qed_free_l2_cache(L2TableCache *l2_cache);
+CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
+void qed_unref_l2_cache_entry(CachedL2Table *entry);
+CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
+void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
+
+/**
+ * Table I/O functions
+ */
+int qed_read_l1_table_sync(BDRVQEDState *s);
+void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
+ BlockDriverCompletionFunc *cb, void *opaque);
+int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
+ unsigned int n);
+int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+ uint64_t offset);
+void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush,
+ BlockDriverCompletionFunc *cb, void *opaque);
+int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush);
+
+/**
+ * Cluster functions
+ */
+void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+ size_t len, QEDFindClusterFunc *cb, void *opaque);
+
+/**
+ * Consistency check
+ */
+int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
+
+QEDTable *qed_alloc_table(BDRVQEDState *s);
+
+/**
+ * Round down to the start of a cluster
+ */
+static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
+{
+ return offset & ~(uint64_t)(s->header.cluster_size - 1);
+}
+
+static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
+{
+ return offset & (s->header.cluster_size - 1);
+}
+
+static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
+{
+ return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
+ (s->header.cluster_size - 1);
+}
+
+static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
+{
+ return pos >> s->l1_shift;
+}
+
+static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
+{
+ return (pos >> s->l2_shift) & s->l2_mask;
+}
+
+/**
+ * Test if a cluster offset is valid
+ */
+static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
+{
+ uint64_t header_size = (uint64_t)s->header.header_size *
+ s->header.cluster_size;
+
+ if (offset & (s->header.cluster_size - 1)) {
+ return false;
+ }
+ return offset >= header_size && offset < s->file_size;
+}
+
+/**
+ * Test if a table offset is valid
+ */
+static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
+{
+ uint64_t end_offset = offset + (s->header.table_size - 1) *
+ s->header.cluster_size;
+
+ /* Overflow check */
+ if (end_offset <= offset) {
+ return false;
+ }
+
+ return qed_check_cluster_offset(s, offset) &&
+ qed_check_cluster_offset(s, end_offset);
+}
+
+static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
+ uint64_t offset)
+{
+ if (qed_offset_into_cluster(s, offset)) {
+ return false;
+ }
+ return true;
+}
+
+static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
+{
+ if (offset == 0) {
+ return true;
+ }
+ return false;
+}
+
+static inline bool qed_offset_is_zero_cluster(uint64_t offset)
+{
+ if (offset == 1) {
+ return true;
+ }
+ return false;
+}
+
+#endif /* BLOCK_QED_H */
diff --git a/contrib/qemu/block/snapshot.c b/contrib/qemu/block/snapshot.c
new file mode 100644
index 000000000..6c6d9deea
--- /dev/null
+++ b/contrib/qemu/block/snapshot.c
@@ -0,0 +1,157 @@
+/*
+ * Block layer snapshot related functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/snapshot.h"
+#include "block/block_int.h"
+
+int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
+ const char *name)
+{
+ QEMUSnapshotInfo *sn_tab, *sn;
+ int nb_sns, i, ret;
+
+ ret = -ENOENT;
+ nb_sns = bdrv_snapshot_list(bs, &sn_tab);
+ if (nb_sns < 0) {
+ return ret;
+ }
+ for (i = 0; i < nb_sns; i++) {
+ sn = &sn_tab[i];
+ if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
+ *sn_info = *sn;
+ ret = 0;
+ break;
+ }
+ }
+ g_free(sn_tab);
+ return ret;
+}
+
+int bdrv_can_snapshot(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+ return 0;
+ }
+
+ if (!drv->bdrv_snapshot_create) {
+ if (bs->file != NULL) {
+ return bdrv_can_snapshot(bs->file);
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+int bdrv_snapshot_create(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_create) {
+ return drv->bdrv_snapshot_create(bs, sn_info);
+ }
+ if (bs->file) {
+ return bdrv_snapshot_create(bs->file, sn_info);
+ }
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_goto(BlockDriverState *bs,
+ const char *snapshot_id)
+{
+ BlockDriver *drv = bs->drv;
+ int ret, open_ret;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_goto) {
+ return drv->bdrv_snapshot_goto(bs, snapshot_id);
+ }
+
+ if (bs->file) {
+ drv->bdrv_close(bs);
+ ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+ open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
+ if (open_ret < 0) {
+ bdrv_delete(bs->file);
+ bs->drv = NULL;
+ return open_ret;
+ }
+ return ret;
+ }
+
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_delete) {
+ return drv->bdrv_snapshot_delete(bs, snapshot_id);
+ }
+ if (bs->file) {
+ return bdrv_snapshot_delete(bs->file, snapshot_id);
+ }
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_list(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_list) {
+ return drv->bdrv_snapshot_list(bs, psn_info);
+ }
+ if (bs->file) {
+ return bdrv_snapshot_list(bs->file, psn_info);
+ }
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+ const char *snapshot_name)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (!bs->read_only) {
+ return -EINVAL;
+ }
+ if (drv->bdrv_snapshot_load_tmp) {
+ return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
+ }
+ return -ENOTSUP;
+}
diff --git a/contrib/qemu/config-host.h b/contrib/qemu/config-host.h
new file mode 100644
index 000000000..46b1595a8
--- /dev/null
+++ b/contrib/qemu/config-host.h
@@ -0,0 +1,73 @@
+/* Automatically generated by create_config - do not modify */
+#define CONFIG_QEMU_CONFDIR "/usr/local/etc/qemu"
+#define CONFIG_QEMU_DATADIR "/usr/local/share/qemu"
+#define CONFIG_QEMU_DOCDIR "/usr/local/share/doc/qemu"
+#define CONFIG_QEMU_LOCALSTATEDIR "/usr/local/var"
+#define CONFIG_QEMU_HELPERDIR "/usr/local/libexec"
+#define CONFIG_QEMU_LOCALEDIR "/usr/local/share/locale"
+#define HOST_X86_64 1
+#define CONFIG_QEMU_LDST_OPTIMIZATION 1
+#define CONFIG_POSIX 1
+#define CONFIG_LINUX 1
+#define CONFIG_SLIRP 1
+#define CONFIG_SMBD_COMMAND "/usr/sbin/smbd"
+#define CONFIG_AUDIO_DRIVERS \
+ &oss_audio_driver,\
+
+#define CONFIG_OSS 1
+#define CONFIG_BDRV_RW_WHITELIST\
+ NULL
+#define CONFIG_BDRV_RO_WHITELIST\
+ NULL
+#define CONFIG_VNC 1
+#define CONFIG_VNC_TLS 1
+#define CONFIG_VNC_SASL 1
+#define CONFIG_VNC_WS 1
+#define CONFIG_FNMATCH 1
+#define CONFIG_UUID 1
+#define CONFIG_XFS 1
+#define QEMU_VERSION "1.5.50"
+#define QEMU_PKGVERSION ""
+#define CONFIG_CURSES 1
+#define CONFIG_UTIMENSAT 1
+#define CONFIG_PIPE2 1
+#define CONFIG_ACCEPT4 1
+#define CONFIG_SPLICE 1
+#define CONFIG_EVENTFD 1
+#define CONFIG_FALLOCATE 1
+#define CONFIG_FALLOCATE_PUNCH_HOLE 1
+#define CONFIG_SYNC_FILE_RANGE 1
+#define CONFIG_FIEMAP 1
+#define CONFIG_DUP3 1
+#define CONFIG_EPOLL 1
+#define CONFIG_EPOLL_CREATE1 1
+#define CONFIG_EPOLL_PWAIT 1
+#define CONFIG_SENDFILE 1
+#define CONFIG_INOTIFY 1
+#define CONFIG_INOTIFY1 1
+#define CONFIG_BYTESWAP_H 1
+#define CONFIG_CURL 1
+#define CONFIG_LINUX_AIO 1
+#define CONFIG_ATTR 1
+#define CONFIG_VHOST_SCSI 1
+#define CONFIG_IOVEC 1
+#define CONFIG_PREADV 1
+#define CONFIG_FDT 1
+#define CONFIG_SIGNALFD 1
+#define CONFIG_FDATASYNC 1
+#define CONFIG_MADVISE 1
+#define CONFIG_POSIX_MADVISE 1
+#define CONFIG_SIGEV_THREAD_ID 1
+#define CONFIG_UNAME_RELEASE ""
+#define CONFIG_QOM_CAST_DEBUG 1
+#define CONFIG_COROUTINE_BACKEND ucontext
+#define CONFIG_OPEN_BY_HANDLE 1
+#define CONFIG_LINUX_MAGIC_H 1
+#define CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE 1
+#define CONFIG_HAS_ENVIRON 1
+#define CONFIG_CPUID_H 1
+#define CONFIG_INT128 1
+#define CONFIG_VIRTIO_BLK_DATA_PLANE $(CONFIG_VIRTIO)
+#define CONFIG_TRACE_NOP 1
+#define CONFIG_TRACE_FILE trace
+#define CONFIG_TRACE_DEFAULT 1
diff --git a/contrib/qemu/coroutine-ucontext.c b/contrib/qemu/coroutine-ucontext.c
new file mode 100644
index 000000000..4bf2cde27
--- /dev/null
+++ b/contrib/qemu/coroutine-ucontext.c
@@ -0,0 +1,225 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <ucontext.h>
+#include "qemu-common.h"
+#include "block/coroutine_int.h"
+
+#ifdef CONFIG_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+typedef struct {
+ Coroutine base;
+ void *stack;
+ sigjmp_buf env;
+
+#ifdef CONFIG_VALGRIND_H
+ unsigned int valgrind_stack_id;
+#endif
+
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+ /** Currently executing coroutine */
+ Coroutine *current;
+
+ /** The default coroutine */
+ CoroutineUContext leader;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+ void *p;
+ int i[2];
+};
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ if (!s) {
+ s = g_malloc0(sizeof(*s));
+ s->current = &s->leader.base;
+ pthread_setspecific(thread_state_key, s);
+ }
+ return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+ CoroutineThreadState *s = opaque;
+
+ g_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+ int ret;
+
+ ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+ if (ret != 0) {
+ fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+ abort();
+ }
+}
+
+static void coroutine_trampoline(int i0, int i1)
+{
+ union cc_arg arg;
+ CoroutineUContext *self;
+ Coroutine *co;
+
+ arg.i[0] = i0;
+ arg.i[1] = i1;
+ self = arg.p;
+ co = &self->base;
+
+ /* Initialize longjmp environment and switch back the caller */
+ if (!sigsetjmp(self->env, 0)) {
+ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+ }
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineUContext *co;
+ ucontext_t old_uc, uc;
+ sigjmp_buf old_env;
+ union cc_arg arg = {0};
+
+ /* The ucontext functions preserve signal masks which incurs a
+ * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not
+ * preserve signal masks but only works on the current stack.
+ * Since we need a way to create and switch to a new stack, use
+ * the ucontext functions for that but sigsetjmp()/siglongjmp() for
+ * everything else.
+ */
+
+ if (getcontext(&uc) == -1) {
+ abort();
+ }
+
+ co = g_malloc0(sizeof(*co));
+ co->stack = g_malloc(stack_size);
+ co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+ uc.uc_link = &old_uc;
+ uc.uc_stack.ss_sp = co->stack;
+ uc.uc_stack.ss_size = stack_size;
+ uc.uc_stack.ss_flags = 0;
+
+#ifdef CONFIG_VALGRIND_H
+ co->valgrind_stack_id =
+ VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+#endif
+
+ arg.p = co;
+
+ makecontext(&uc, (void (*)(void))coroutine_trampoline,
+ 2, arg.i[0], arg.i[1]);
+
+ /* swapcontext() in, siglongjmp() back out */
+ if (!sigsetjmp(old_env, 0)) {
+ swapcontext(&old_uc, &uc);
+ }
+ return &co->base;
+}
+
+#ifdef CONFIG_VALGRIND_H
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+/* Work around an unused variable in the valgrind.h macro... */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+static inline void valgrind_stack_deregister(CoroutineUContext *co)
+{
+ VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
+}
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+#ifdef CONFIG_VALGRIND_H
+ valgrind_stack_deregister(co);
+#endif
+
+ g_free(co->stack);
+ g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+ CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+ CoroutineThreadState *s = coroutine_get_thread_state();
+ int ret;
+
+ s->current = to_;
+
+ ret = sigsetjmp(from->env, 0);
+ if (ret == 0) {
+ siglongjmp(to->env, action);
+ }
+ return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ CoroutineThreadState *s = coroutine_get_thread_state();
+
+ return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ return s && s->current->caller;
+}
diff --git a/contrib/qemu/include/block/aio.h b/contrib/qemu/include/block/aio.h
new file mode 100644
index 000000000..183679374
--- /dev/null
+++ b/contrib/qemu/include/block/aio.h
@@ -0,0 +1,247 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_AIO_H
+#define QEMU_AIO_H
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "qemu/event_notifier.h"
+
+typedef struct BlockDriverAIOCB BlockDriverAIOCB;
+typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+
+typedef struct AIOCBInfo {
+ void (*cancel)(BlockDriverAIOCB *acb);
+ size_t aiocb_size;
+} AIOCBInfo;
+
+struct BlockDriverAIOCB {
+ const AIOCBInfo *aiocb_info;
+ BlockDriverState *bs;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+};
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void qemu_aio_release(void *p);
+
+typedef struct AioHandler AioHandler;
+typedef void QEMUBHFunc(void *opaque);
+typedef void IOHandler(void *opaque);
+
+typedef struct AioContext {
+ GSource source;
+
+ /* The list of registered AIO handlers */
+ QLIST_HEAD(, AioHandler) aio_handlers;
+
+ /* This is a simple lock used to protect the aio_handlers list.
+ * Specifically, it's used to ensure that no callbacks are removed while
+ * we're walking and dispatching callbacks.
+ */
+ int walking_handlers;
+
+ /* Anchor of the list of Bottom Halves belonging to the context */
+ struct QEMUBH *first_bh;
+
+ /* A simple lock used to protect the first_bh list, and ensure that
+ * no callbacks are removed while we're walking and dispatching callbacks.
+ */
+ int walking_bh;
+
+ /* Used for aio_notify. */
+ EventNotifier notifier;
+
+ /* GPollFDs for aio_poll() */
+ GArray *pollfds;
+
+ /* Thread pool for performing work and receiving completion callbacks */
+ struct ThreadPool *thread_pool;
+} AioContext;
+
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
+
+/**
+ * aio_context_new: Allocate a new AioContext.
+ *
+ * AioContext provide a mini event-loop that can be waited on synchronously.
+ * They also provide bottom halves, a service to execute a piece of code
+ * as soon as possible.
+ */
+AioContext *aio_context_new(void);
+
+/**
+ * aio_context_ref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Add a reference to an AioContext.
+ */
+void aio_context_ref(AioContext *ctx);
+
+/**
+ * aio_context_unref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Drop a reference to an AioContext.
+ */
+void aio_context_unref(AioContext *ctx);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure.
+ *
+ * Bottom halves are lightweight callbacks whose invocation is guaranteed
+ * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
+ * is opaque and must be allocated prior to its use.
+ */
+QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+
+/**
+ * aio_notify: Force processing of pending events.
+ *
+ * Similar to signaling a condition variable, aio_notify forces
+ * aio_wait to exit, so that the next call will re-examine pending events.
+ * The caller of aio_notify will usually call aio_wait again very soon,
+ * or go through another iteration of the GLib main loop. Hence, aio_notify
+ * also has the side effect of recalculating the sets of file descriptors
+ * that the main loop waits for.
+ *
+ * Calling aio_notify is rarely necessary, because for example scheduling
+ * a bottom half calls it already.
+ */
+void aio_notify(AioContext *ctx);
+
+/**
+ * aio_bh_poll: Poll bottom halves for an AioContext.
+ *
+ * These are internal functions used by the QEMU main loop.
+ */
+int aio_bh_poll(AioContext *ctx);
+
+/**
+ * qemu_bh_schedule: Schedule a bottom half.
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked. This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ */
+void qemu_bh_schedule(QEMUBH *bh);
+
+/**
+ * qemu_bh_cancel: Cancel execution of a bottom half.
+ *
+ * Canceling execution of a bottom half undoes the effect of calls to
+ * qemu_bh_schedule without freeing its resources yet. While cancellation
+ * itself is also wait-free and thread-safe, it can of course race with the
+ * loop that executes bottom halves unless you are holding the iothread
+ * mutex. This makes it mostly useless if you are not holding the mutex.
+ *
+ * @bh: The bottom half to be canceled.
+ */
+void qemu_bh_cancel(QEMUBH *bh);
+
+/**
+ *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
+ *
+ * Deleting a bottom half frees the memory that was allocated for it by
+ * qemu_bh_new. It also implies canceling the bottom half if it was
+ * scheduled.
+ *
+ * @bh: The bottom half to be deleted.
+ */
+void qemu_bh_delete(QEMUBH *bh);
+
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_pending(AioContext *ctx);
+
+/* Progress in completing AIO work to occur. This can issue new pending
+ * aio as a result of executing I/O completion or bh callbacks.
+ *
+ * If there is no pending AIO operation or completion (bottom half),
+ * return false. If there are pending AIO operations of bottom halves,
+ * return true.
+ *
+ * If there are no pending bottom halves, but there are pending AIO
+ * operations, it may not be possible to make any progress without
+ * blocking. If @blocking is true, this function will wait until one
+ * or more AIO events have completed, to ensure something has moved
+ * before returning.
+ */
+bool aio_poll(AioContext *ctx, bool blocking);
+
+#ifdef CONFIG_POSIX
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushHandler)(void *opaque);
+
+/* Register a file descriptor and associated callbacks. Behaves very similarly
+ * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
+ * be invoked when using qemu_aio_wait().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of qemu_set_fd_handler[2].
+ */
+void aio_set_fd_handler(AioContext *ctx,
+ int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+#endif
+
+/* Register an event notifier and associated callbacks. Behaves very similarly
+ * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
+ * will be invoked when using qemu_aio_wait().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of event_notifier_set_handler.
+ */
+void aio_set_event_notifier(AioContext *ctx,
+ EventNotifier *notifier,
+ EventNotifierHandler *io_read,
+ AioFlushEventNotifierHandler *io_flush);
+
+/* Return a GSource that lets the main loop poll the file descriptors attached
+ * to this AioContext.
+ */
+GSource *aio_get_g_source(AioContext *ctx);
+
+/* Return the ThreadPool bound to this AioContext */
+struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
+
+/* Functions to operate on the main QEMU AioContext. */
+
+bool qemu_aio_wait(void);
+void qemu_aio_set_event_notifier(EventNotifier *notifier,
+ EventNotifierHandler *io_read,
+ AioFlushEventNotifierHandler *io_flush);
+
+#ifdef CONFIG_POSIX
+void qemu_aio_set_fd_handler(int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+#endif
+
+#endif
diff --git a/contrib/qemu/include/block/block.h b/contrib/qemu/include/block/block.h
new file mode 100644
index 000000000..b6b9014a9
--- /dev/null
+++ b/contrib/qemu/include/block/block.h
@@ -0,0 +1,443 @@
+#ifndef BLOCK_H
+#define BLOCK_H
+
+#include "block/aio.h"
+#include "qemu-common.h"
+#include "qemu/option.h"
+#include "block/coroutine.h"
+#include "qapi/qmp/qobject.h"
+#include "qapi-types.h"
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;
+
+typedef struct BlockDriverInfo {
+ /* in bytes, 0 if irrelevant */
+ int cluster_size;
+ /* offset at which the VM state can be saved (0 if not possible) */
+ int64_t vm_state_offset;
+ bool is_dirty;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+ uint64_t allocated_clusters;
+ uint64_t total_clusters;
+ uint64_t fragmented_clusters;
+ uint64_t compressed_clusters;
+} BlockFragInfo;
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+ /*
+ * Runs when virtual media changed (monitor commands eject, change)
+ * Argument load is true on load and false on eject.
+ * Beware: doesn't run when a host device's physical media
+ * changes. Sure would be useful if it did.
+ * Device models with removable media must implement this callback.
+ */
+ void (*change_media_cb)(void *opaque, bool load);
+ /*
+ * Runs when an eject request is issued from the monitor, the tray
+ * is closed, and the medium is locked.
+ * Device models that do not implement is_medium_locked will not need
+ * this callback. Device models that can lock the medium or tray might
+ * want to implement the callback and unlock the tray when "force" is
+ * true, even if they do not support eject requests.
+ */
+ void (*eject_request_cb)(void *opaque, bool force);
+ /*
+ * Is the virtual tray open?
+ * Device models implement this only when the device has a tray.
+ */
+ bool (*is_tray_open)(void *opaque);
+ /*
+ * Is the virtual medium locked into the device?
+ * Device models implement this only when device has such a lock.
+ */
+ bool (*is_medium_locked)(void *opaque);
+ /*
+ * Runs when the size changed (e.g. monitor command block_resize)
+ */
+ void (*resize_cb)(void *opaque);
+} BlockDevOps;
+
+#define BDRV_O_RDWR 0x0002
+#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
+#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
+#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
+#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
+#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
+#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
+#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
+#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
+#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
+
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
+
+#define BDRV_SECTOR_BITS 9
+#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
+#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
+
+typedef enum {
+ BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
+} BlockErrorAction;
+
+typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+ BlockDriverState *bs;
+ int flags;
+ void *opaque;
+} BDRVReopenState;
+
+
+void bdrv_iostatus_enable(BlockDriverState *bs);
+void bdrv_iostatus_reset(BlockDriverState *bs);
+void bdrv_iostatus_disable(BlockDriverState *bs);
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
+void bdrv_info_print(Monitor *mon, const QObject *data);
+void bdrv_info(Monitor *mon, QObject **ret_data);
+void bdrv_stats_print(Monitor *mon, const QObject *data);
+void bdrv_info_stats(Monitor *mon, QObject **ret_data);
+
+/* disk I/O throttling */
+void bdrv_io_limits_enable(BlockDriverState *bs);
+void bdrv_io_limits_disable(BlockDriverState *bs);
+bool bdrv_io_limits_enabled(BlockDriverState *bs);
+
+void bdrv_init(void);
+void bdrv_init_with_whitelist(void);
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix);
+BlockDriver *bdrv_find_format(const char *format_name);
+BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
+ bool readonly);
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QEMUOptionParameter *options);
+int bdrv_create_file(const char* filename, QEMUOptionParameter *options);
+BlockDriverState *bdrv_new(const char *device_name);
+void bdrv_make_anon(BlockDriverState *bs);
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_delete(BlockDriverState *bs);
+int bdrv_parse_cache_flags(const char *mode, int *flags);
+int bdrv_parse_discard_flags(const char *mode, int *flags);
+int bdrv_file_open(BlockDriverState **pbs, const char *filename,
+ QDict *options, int flags);
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *options);
+int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
+ int flags, BlockDriver *drv);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+void bdrv_reopen_abort(BDRVReopenState *reopen_state);
+void bdrv_close(BlockDriverState *bs);
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
+int bdrv_attach_dev(BlockDriverState *bs, void *dev);
+void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
+void bdrv_detach_dev(BlockDriverState *bs, void *dev);
+void *bdrv_get_attached_dev(BlockDriverState *bs);
+void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
+ void *opaque);
+void bdrv_dev_eject_request(BlockDriverState *bs, bool force);
+bool bdrv_dev_has_removable_media(BlockDriverState *bs);
+bool bdrv_dev_is_tray_open(BlockDriverState *bs);
+bool bdrv_dev_is_medium_locked(BlockDriverState *bs);
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
+int bdrv_pread(BlockDriverState *bs, int64_t offset,
+ void *buf, int count);
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors);
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum);
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file);
+int bdrv_get_backing_file_depth(BlockDriverState *bs);
+int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
+int bdrv_commit(BlockDriverState *bs);
+int bdrv_commit_all(void);
+int bdrv_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+
+
+typedef struct BdrvCheckResult {
+ int corruptions;
+ int leaks;
+ int check_errors;
+ int corruptions_fixed;
+ int leaks_fixed;
+ int64_t image_end_offset;
+ BlockFragInfo bfi;
+} BdrvCheckResult;
+
+typedef enum {
+ BDRV_FIX_LEAKS = 1,
+ BDRV_FIX_ERRORS = 2,
+} BdrvCheckMode;
+
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
+
+/* async block I/O */
+typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
+ int sector_num);
+BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void bdrv_aio_cancel(BlockDriverAIOCB *acb);
+
+typedef struct BlockRequest {
+ /* Fields to be filled by multiwrite caller */
+ int64_t sector;
+ int nb_sectors;
+ QEMUIOVector *qiov;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+
+ /* Filled by multiwrite implementation */
+ int error;
+} BlockRequest;
+
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs,
+ int num_reqs);
+
+/* sg packet commands */
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
+BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+/* Invalidate any cached metadata used by image formats */
+void bdrv_invalidate_cache(BlockDriverState *bs);
+void bdrv_invalidate_cache_all(void);
+
+void bdrv_clear_incoming_migration_all(void);
+
+/* Ensure contents are flushed to disk. */
+int bdrv_flush(BlockDriverState *bs);
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+int bdrv_flush_all(void);
+void bdrv_close_all(void);
+void bdrv_drain_all(void);
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_has_zero_init_1(BlockDriverState *bs);
+int bdrv_has_zero_init(BlockDriverState *bs);
+int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ int *pnum);
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error);
+int bdrv_is_read_only(BlockDriverState *bs);
+int bdrv_is_sg(BlockDriverState *bs);
+int bdrv_enable_write_cache(BlockDriverState *bs);
+void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
+int bdrv_is_inserted(BlockDriverState *bs);
+int bdrv_media_changed(BlockDriverState *bs);
+void bdrv_lock_medium(BlockDriverState *bs, bool locked);
+void bdrv_eject(BlockDriverState *bs, bool eject_flag);
+const char *bdrv_get_format_name(BlockDriverState *bs);
+BlockDriverState *bdrv_find(const char *name);
+BlockDriverState *bdrv_next(BlockDriverState *bs);
+void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs),
+ void *opaque);
+int bdrv_is_encrypted(BlockDriverState *bs);
+int bdrv_key_required(BlockDriverState *bs);
+int bdrv_set_key(BlockDriverState *bs, const char *key);
+int bdrv_query_missing_keys(void);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque);
+const char *bdrv_get_device_name(BlockDriverState *bs);
+int bdrv_get_flags(BlockDriverState *bs);
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors);
+
+const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size);
+void bdrv_get_full_backing_filename(BlockDriverState *bs,
+ char *dest, size_t sz);
+int bdrv_is_snapshot(BlockDriverState *bs);
+
+int path_is_absolute(const char *path);
+void path_combine(char *dest, int dest_size,
+ const char *base_path,
+ const char *filename);
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size);
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ Error **errp, bool quiet);
+
+void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
+
+struct HBitmapIter;
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
+int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
+int64_t bdrv_get_dirty_count(BlockDriverState *bs);
+
+void bdrv_enable_copy_on_read(BlockDriverState *bs);
+void bdrv_disable_copy_on_read(BlockDriverState *bs);
+
+void bdrv_set_in_use(BlockDriverState *bs, int in_use);
+int bdrv_in_use(BlockDriverState *bs);
+
+#ifdef CONFIG_LINUX_AIO
+int raw_get_aio_fd(BlockDriverState *bs);
+#else
+static inline int raw_get_aio_fd(BlockDriverState *bs)
+{
+ return -ENOTSUP;
+}
+#endif
+
+enum BlockAcctType {
+ BDRV_ACCT_READ,
+ BDRV_ACCT_WRITE,
+ BDRV_ACCT_FLUSH,
+ BDRV_MAX_IOTYPE,
+};
+
+typedef struct BlockAcctCookie {
+ int64_t bytes;
+ int64_t start_time_ns;
+ enum BlockAcctType type;
+} BlockAcctCookie;
+
+void bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
+ int64_t bytes, enum BlockAcctType type);
+void bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie);
+
+typedef enum {
+ BLKDBG_L1_UPDATE,
+
+ BLKDBG_L1_GROW_ALLOC_TABLE,
+ BLKDBG_L1_GROW_WRITE_TABLE,
+ BLKDBG_L1_GROW_ACTIVATE_TABLE,
+
+ BLKDBG_L2_LOAD,
+ BLKDBG_L2_UPDATE,
+ BLKDBG_L2_UPDATE_COMPRESSED,
+ BLKDBG_L2_ALLOC_COW_READ,
+ BLKDBG_L2_ALLOC_WRITE,
+
+ BLKDBG_READ_AIO,
+ BLKDBG_READ_BACKING_AIO,
+ BLKDBG_READ_COMPRESSED,
+
+ BLKDBG_WRITE_AIO,
+ BLKDBG_WRITE_COMPRESSED,
+
+ BLKDBG_VMSTATE_LOAD,
+ BLKDBG_VMSTATE_SAVE,
+
+ BLKDBG_COW_READ,
+ BLKDBG_COW_WRITE,
+
+ BLKDBG_REFTABLE_LOAD,
+ BLKDBG_REFTABLE_GROW,
+
+ BLKDBG_REFBLOCK_LOAD,
+ BLKDBG_REFBLOCK_UPDATE,
+ BLKDBG_REFBLOCK_UPDATE_PART,
+ BLKDBG_REFBLOCK_ALLOC,
+ BLKDBG_REFBLOCK_ALLOC_HOOKUP,
+ BLKDBG_REFBLOCK_ALLOC_WRITE,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE,
+ BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE,
+
+ BLKDBG_CLUSTER_ALLOC,
+ BLKDBG_CLUSTER_ALLOC_BYTES,
+ BLKDBG_CLUSTER_FREE,
+
+ BLKDBG_FLUSH_TO_OS,
+ BLKDBG_FLUSH_TO_DISK,
+
+ BLKDBG_EVENT_MAX,
+} BlkDebugEvent;
+
+#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
+void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
+#endif
diff --git a/contrib/qemu/include/block/block_int.h b/contrib/qemu/include/block/block_int.h
new file mode 100644
index 000000000..c6ac871e2
--- /dev/null
+++ b/contrib/qemu/include/block/block_int.h
@@ -0,0 +1,421 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_H
+#define BLOCK_INT_H
+
+#include "block/block.h"
+#include "qemu/option.h"
+#include "qemu/queue.h"
+#include "block/coroutine.h"
+#include "qemu/timer.h"
+#include "qapi-types.h"
+#include "qapi/qmp/qerror.h"
+#include "monitor/monitor.h"
+#include "qemu/hbitmap.h"
+#include "block/snapshot.h"
+
+#define BLOCK_FLAG_ENCRYPT 1
+#define BLOCK_FLAG_COMPAT6 4
+#define BLOCK_FLAG_LAZY_REFCOUNTS 8
+
+#define BLOCK_IO_LIMIT_READ 0
+#define BLOCK_IO_LIMIT_WRITE 1
+#define BLOCK_IO_LIMIT_TOTAL 2
+
+#define BLOCK_IO_SLICE_TIME 100000000
+#define NANOSECONDS_PER_SECOND 1000000000.0
+
+#define BLOCK_OPT_SIZE "size"
+#define BLOCK_OPT_ENCRYPT "encryption"
+#define BLOCK_OPT_COMPAT6 "compat6"
+#define BLOCK_OPT_BACKING_FILE "backing_file"
+#define BLOCK_OPT_BACKING_FMT "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE "table_size"
+#define BLOCK_OPT_PREALLOC "preallocation"
+#define BLOCK_OPT_SUBFMT "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
+
+typedef struct BdrvTrackedRequest {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ int nb_sectors;
+ bool is_write;
+ QLIST_ENTRY(BdrvTrackedRequest) list;
+ Coroutine *co; /* owner, used for deadlock detection */
+ CoQueue wait_queue; /* coroutines blocked on this request */
+} BdrvTrackedRequest;
+
+
+typedef struct BlockIOLimit {
+ int64_t bps[3];
+ int64_t iops[3];
+} BlockIOLimit;
+
+typedef struct BlockIOBaseValue {
+ uint64_t bytes[2];
+ uint64_t ios[2];
+} BlockIOBaseValue;
+
+struct BlockDriver {
+ const char *format_name;
+ int instance_size;
+ int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+ int (*bdrv_probe_device)(const char *filename);
+
+ /* Any driver implementing this callback is expected to be able to handle
+ * NULL file names in its .bdrv_open() implementation */
+ void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
+
+ /* For handling image reopen for split or non-split files */
+ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+ void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+
+ int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags);
+ int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags);
+ int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+ int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+ void (*bdrv_close)(BlockDriverState *bs);
+ void (*bdrv_rebind)(BlockDriverState *bs);
+ int (*bdrv_create)(const char *filename, QEMUOptionParameter *options);
+ int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
+ int (*bdrv_make_empty)(BlockDriverState *bs);
+ /* aio */
+ BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+ int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL and .bdrv_co_writev() will be called
+ * instead.
+ */
+ int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+ /*
+ * Invalidate any cached meta-data.
+ */
+ void (*bdrv_invalidate_cache)(BlockDriverState *bs);
+
+ /*
+ * Flushes all data that was already written to the OS all the way down to
+ * the disk (for example raw-posix calls fsync()).
+ */
+ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+
+ /*
+ * Flushes all internal caches to the OS. The data may still sit in a
+ * writeback cache of the host OS, but it will survive a crash of the qemu
+ * process.
+ */
+ int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+
+ const char *protocol_name;
+ int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
+ int64_t (*bdrv_getlength)(BlockDriverState *bs);
+ int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+ int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+
+ int (*bdrv_snapshot_create)(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+ int (*bdrv_snapshot_goto)(BlockDriverState *bs,
+ const char *snapshot_id);
+ int (*bdrv_snapshot_delete)(BlockDriverState *bs, const char *snapshot_id);
+ int (*bdrv_snapshot_list)(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+ int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
+ const char *snapshot_name);
+ int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+ int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos);
+ int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+ int (*bdrv_change_backing_file)(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+
+ /* removable device specific */
+ int (*bdrv_is_inserted)(BlockDriverState *bs);
+ int (*bdrv_media_changed)(BlockDriverState *bs);
+ void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
+ void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
+
+ /* to control generic scsi devices */
+ int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
+ BlockDriverAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+ /* List of options for creating images, terminated by name == NULL */
+ QEMUOptionParameter *create_options;
+
+
+ /*
+ * Returns 0 for completed check, -errno for internal errors.
+ * The check results are stored in result.
+ */
+ int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
+ BdrvCheckMode fix);
+
+ void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
+
+ /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+ const char *tag);
+ int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+ bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
+ /*
+ * Returns 1 if newly created images are guaranteed to contain only
+ * zeros, 0 otherwise.
+ */
+ int (*bdrv_has_zero_init)(BlockDriverState *bs);
+
+ QLIST_ENTRY(BlockDriver) list;
+};
+
+/*
+ * Note: the function bdrv_append() copies and swaps contents of
+ * BlockDriverStates, so if you add new fields to this struct, please
+ * inspect bdrv_append() to determine if the new fields need to be
+ * copied as well.
+ */
+struct BlockDriverState {
+ int64_t total_sectors; /* if we are reading a disk image, give its
+ size in sectors */
+ int read_only; /* if true, the media is read only */
+ int open_flags; /* flags used to open the file, re-used for re-open */
+ int encrypted; /* if true, the media is encrypted */
+ int valid_key; /* if true, a valid encryption key has been set */
+ int sg; /* if true, the device is a /dev/sg* */
+ int copy_on_read; /* if true, copy read backing sectors into image
+ note this is a reference count */
+
+ BlockDriver *drv; /* NULL means no media */
+ void *opaque;
+
+ void *dev; /* attached device model, if any */
+ /* TODO change to DeviceState when all users are qdevified */
+ const BlockDevOps *dev_ops;
+ void *dev_opaque;
+
+ char filename[1024];
+ char backing_file[1024]; /* if non zero, the image is a diff of
+ this file image */
+ char backing_format[16]; /* if non-zero and backing_file exists */
+ int is_temporary;
+
+ BlockDriverState *backing_hd;
+ BlockDriverState *file;
+
+ NotifierList close_notifiers;
+
+ /* Callback before write request is processed */
+ NotifierWithReturnList before_write_notifiers;
+
+ /* number of in-flight copy-on-read requests */
+ unsigned int copy_on_read_in_flight;
+
+ /* the time for latest disk I/O */
+ int64_t slice_start;
+ int64_t slice_end;
+ BlockIOLimit io_limits;
+ BlockIOBaseValue slice_submitted;
+ CoQueue throttled_reqs;
+ QEMUTimer *block_timer;
+ bool io_limits_enabled;
+
+ /* I/O stats (display with "info blockstats"). */
+ uint64_t nr_bytes[BDRV_MAX_IOTYPE];
+ uint64_t nr_ops[BDRV_MAX_IOTYPE];
+ uint64_t total_time_ns[BDRV_MAX_IOTYPE];
+ uint64_t wr_highest_sector;
+
+ /* Whether the disk can expand beyond total_sectors */
+ int growable;
+
+ /* the memory alignment required for the buffers handled by this driver */
+ int buffer_alignment;
+
+ /* do we need to tell the quest if we have a volatile write cache? */
+ int enable_write_cache;
+
+ /* NOTE: the following infos are only hints for real hardware
+ drivers. They are not used by the block driver */
+ BlockdevOnError on_read_error, on_write_error;
+ bool iostatus_enabled;
+ BlockDeviceIoStatus iostatus;
+ char device_name[32];
+ HBitmap *dirty_bitmap;
+ int in_use; /* users other than guest access, eg. block migration */
+ QTAILQ_ENTRY(BlockDriverState) list;
+
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+
+ /* long-running background operation */
+ BlockJob *job;
+
+ QDict *options;
+};
+
+int get_tmp_filename(char *filename, int size);
+
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits);
+
+/**
+ * bdrv_add_before_write_notifier:
+ *
+ * Register a callback that is invoked before write requests are processed but
+ * after any throttling or waiting for overlapping requests.
+ */
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+ NotifierWithReturn *notifier);
+
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+#ifdef _WIN32
+int is_windows_drive(const char *filename);
+#endif
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+ enum MonitorEvent ev,
+ BlockErrorAction action, bool is_read);
+
+/**
+ * stream_start:
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @base_id: The file name that will be written to @bs as the new
+ * backing file if the job completes. Ignored if @base is %NULL.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @base_id in the written image and to @base in the live BlockDriverState.
+ */
+void stream_start(BlockDriverState *bs, BlockDriverState *base,
+ const char *base_id, int64_t speed, BlockdevOnError on_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+ BlockDriverState *top, int64_t speed,
+ BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/*
+ * mirror_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs. Clusters that are allocated
+ * in @bs will be written to @bs until the job is cancelled or
+ * manually completed. At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, int64_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/*
+ * backup_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ *
+ * Start a backup operation on @bs. Clusters in @bs are written to @target
+ * until the job is cancelled or manually completed.
+ */
+void backup_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb, void *opaque,
+ Error **errp);
+
+#endif /* BLOCK_INT_H */
diff --git a/contrib/qemu/include/block/blockjob.h b/contrib/qemu/include/block/blockjob.h
new file mode 100644
index 000000000..c290d07bb
--- /dev/null
+++ b/contrib/qemu/include/block/blockjob.h
@@ -0,0 +1,278 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block/block.h"
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+ /** Derived BlockJob struct size */
+ size_t instance_size;
+
+ /** String describing the operation, part of query-block-jobs QMP API */
+ const char *job_type;
+
+ /** Optional callback for job types that support setting a speed limit */
+ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+
+ /** Optional callback for job types that need to forward I/O status reset */
+ void (*iostatus_reset)(BlockJob *job);
+
+ /**
+ * Optional callback for job types whose completion must be triggered
+ * manually.
+ */
+ void (*complete)(BlockJob *job, Error **errp);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+ /** The job type, including the job vtable. */
+ const BlockJobType *job_type;
+
+ /** The block device on which the job is operating. */
+ BlockDriverState *bs;
+
+ /**
+ * The coroutine that executes the job. If not NULL, it is
+ * reentered when busy is false and the job is cancelled.
+ */
+ Coroutine *co;
+
+ /**
+ * Set to true if the job should cancel itself. The flag must
+ * always be tested just before toggling the busy flag from false
+ * to true. After a job has been cancelled, it should only yield
+ * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+ */
+ bool cancelled;
+
+ /**
+ * Set to true if the job is either paused, or will pause itself
+ * as soon as possible (if busy == true).
+ */
+ bool paused;
+
+ /**
+ * Set to false by the job while it is in a quiescent state, where
+ * no I/O is pending and the job has yielded on any condition
+ * that is not detected by #qemu_aio_wait, such as a timer.
+ */
+ bool busy;
+
+ /** Status that is published by the query-block-jobs QMP API */
+ BlockDeviceIoStatus iostatus;
+
+ /** Offset that is published by the query-block-jobs QMP API */
+ int64_t offset;
+
+ /** Length that is published by the query-block-jobs QMP API */
+ int64_t len;
+
+ /** Speed that was set with @block_job_set_speed. */
+ int64_t speed;
+
+ /** The completion function that will be called when the job completes. */
+ BlockDriverCompletionFunc *cb;
+
+ /** The opaque value that is passed to the completion function. */
+ void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it. The job
+ * will call @cb asynchronously when the job completes. Note that
+ * @bs may have been closed at the time the @cb it is called. If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+ int64_t speed, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds. Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
+
+/**
+ * block_job_completed:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_completed(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_complete:
+ * @job: The job to be completed.
+ * @errp: Error object.
+ *
+ * Asynchronously complete the specified job.
+ */
+void block_job_complete(BlockJob *job, Error **errp);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
+/**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * qobject_from_block_job:
+ * @job: The job whose information is requested.
+ *
+ * Return a QDict corresponding to @job's query-block-jobs entry.
+ */
+QObject *qobject_from_block_job(BlockJob *job);
+
+/**
+ * block_job_ready:
+ * @job: The job which is now ready to complete.
+ *
+ * Send a BLOCK_JOB_READY event for the specified job.
+ */
+void block_job_ready(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job. The completion callback is called
+ * before the function returns. The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+/**
+ * block_job_iostatus_reset:
+ * @job: The job whose I/O status should be reset.
+ *
+ * Reset I/O status on @job and on BlockDriverState objects it uses,
+ * other than job->bs.
+ */
+void block_job_iostatus_reset(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @bs: The block device on which to set an I/O error.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM. Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+ BlockdevOnError on_err,
+ int is_read, int error);
+#endif
diff --git a/contrib/qemu/include/block/coroutine.h b/contrib/qemu/include/block/coroutine.h
new file mode 100644
index 000000000..377805a3b
--- /dev/null
+++ b/contrib/qemu/include/block/coroutine.h
@@ -0,0 +1,218 @@
+/*
+ * QEMU coroutine implementation
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_COROUTINE_H
+#define QEMU_COROUTINE_H
+
+#include <stdbool.h>
+#include "qemu/queue.h"
+#include "qemu/timer.h"
+
+/**
+ * Coroutines are a mechanism for stack switching and can be used for
+ * cooperative userspace threading. These functions provide a simple but
+ * useful flavor of coroutines that is suitable for writing sequential code,
+ * rather than callbacks, for operations that need to give up control while
+ * waiting for events to complete.
+ *
+ * These functions are re-entrant and may be used outside the global mutex.
+ */
+
+/**
+ * Mark a function that executes in coroutine context
+ *
+ * Functions that execute in coroutine context cannot be called directly from
+ * normal functions. In the future it would be nice to enable compiler or
+ * static checker support for catching such errors. This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ *
+ * For example:
+ *
+ * static void coroutine_fn foo(void) {
+ * ....
+ * }
+ */
+#define coroutine_fn
+
+typedef struct Coroutine Coroutine;
+
+/**
+ * Coroutine entry point
+ *
+ * When the coroutine is entered for the first time, opaque is passed in as an
+ * argument.
+ *
+ * When this function returns, the coroutine is destroyed automatically and
+ * execution continues in the caller who last entered the coroutine.
+ */
+typedef void coroutine_fn CoroutineEntry(void *opaque);
+
+/**
+ * Create a new coroutine
+ *
+ * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
+ */
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
+
+/**
+ * Transfer control to a coroutine
+ *
+ * The opaque argument is passed as the argument to the entry point when
+ * entering the coroutine for the first time. It is subsequently ignored.
+ */
+void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
+
+/**
+ * Transfer control back to a coroutine's caller
+ *
+ * This function does not return until the coroutine is re-entered using
+ * qemu_coroutine_enter().
+ */
+void coroutine_fn qemu_coroutine_yield(void);
+
+/**
+ * Get the currently executing coroutine
+ */
+Coroutine *coroutine_fn qemu_coroutine_self(void);
+
+/**
+ * Return whether or not currently inside a coroutine
+ *
+ * This can be used to write functions that work both when in coroutine context
+ * and when not in coroutine context. Note that such functions cannot use the
+ * coroutine_fn annotation since they work outside coroutine context.
+ */
+bool qemu_in_coroutine(void);
+
+
+
+/**
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
+ * them later. They provide the fundamental primitives on which coroutine locks
+ * are built.
+ */
+typedef struct CoQueue {
+ QTAILQ_HEAD(, Coroutine) entries;
+ AioContext *ctx;
+} CoQueue;
+
+/**
+ * Initialise a CoQueue. This must be called before any other operation is used
+ * on the CoQueue.
+ */
+void qemu_co_queue_init(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the head of the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue);
+
+/**
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
+ *
+ * Returns true if a coroutine was restarted, false if the queue is empty.
+ */
+bool qemu_co_queue_next(CoQueue *queue);
+
+/**
+ * Restarts all coroutines in the CoQueue and leaves the queue empty.
+ */
+void qemu_co_queue_restart_all(CoQueue *queue);
+
+/**
+ * Checks if the CoQueue is empty.
+ */
+bool qemu_co_queue_empty(CoQueue *queue);
+
+
+/**
+ * Provides a mutex that can be used to synchronise coroutines
+ */
+typedef struct CoMutex {
+ bool locked;
+ CoQueue queue;
+} CoMutex;
+
+/**
+ * Initialises a CoMutex. This must be called before any other operation is used
+ * on the CoMutex.
+ */
+void qemu_co_mutex_init(CoMutex *mutex);
+
+/**
+ * Locks the mutex. If the lock cannot be taken immediately, control is
+ * transferred to the caller of the current coroutine.
+ */
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
+
+/**
+ * Unlocks the mutex and schedules the next coroutine that was waiting for this
+ * lock to be run.
+ */
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
+
+typedef struct CoRwlock {
+ bool writer;
+ int reader;
+ CoQueue queue;
+} CoRwlock;
+
+/**
+ * Initialises a CoRwlock. This must be called before any other operation
+ * is used on the CoRwlock
+ */
+void qemu_co_rwlock_init(CoRwlock *lock);
+
+/**
+ * Read locks the CoRwlock. If the lock cannot be taken immediately because
+ * of a parallel writer, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_rdlock(CoRwlock *lock);
+
+/**
+ * Write Locks the mutex. If the lock cannot be taken immediately because
+ * of a parallel reader, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_wrlock(CoRwlock *lock);
+
+/**
+ * Unlocks the read/write lock and schedules the next coroutine that was
+ * waiting for this lock to be run.
+ */
+void qemu_co_rwlock_unlock(CoRwlock *lock);
+
+/**
+ * Yield the coroutine for a given duration
+ *
+ * Note this function uses timers and hence only works when a main loop is in
+ * use. See main-loop.h and do not use from qemu-tool programs.
+ */
+void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns);
+
+/**
+ * Yield until a file descriptor becomes readable
+ *
+ * Note that this function clobbers the handlers for the file descriptor.
+ */
+void coroutine_fn yield_until_fd_readable(int fd);
+#endif /* QEMU_COROUTINE_H */
diff --git a/contrib/qemu/include/block/coroutine_int.h b/contrib/qemu/include/block/coroutine_int.h
new file mode 100644
index 000000000..f133d65af
--- /dev/null
+++ b/contrib/qemu/include/block/coroutine_int.h
@@ -0,0 +1,53 @@
+/*
+ * Coroutine internals
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_COROUTINE_INT_H
+#define QEMU_COROUTINE_INT_H
+
+#include "qemu/queue.h"
+#include "block/coroutine.h"
+
+typedef enum {
+ COROUTINE_YIELD = 1,
+ COROUTINE_TERMINATE = 2,
+} CoroutineAction;
+
+struct Coroutine {
+ CoroutineEntry *entry;
+ void *entry_arg;
+ Coroutine *caller;
+ QSLIST_ENTRY(Coroutine) pool_next;
+
+ /* Coroutines that should be woken up when we yield or terminate */
+ QTAILQ_HEAD(, Coroutine) co_queue_wakeup;
+ QTAILQ_ENTRY(Coroutine) co_queue_next;
+};
+
+Coroutine *qemu_coroutine_new(void);
+void qemu_coroutine_delete(Coroutine *co);
+CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
+ CoroutineAction action);
+void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
+
+#endif
diff --git a/contrib/qemu/include/block/snapshot.h b/contrib/qemu/include/block/snapshot.h
new file mode 100644
index 000000000..eaf61f032
--- /dev/null
+++ b/contrib/qemu/include/block/snapshot.h
@@ -0,0 +1,53 @@
+/*
+ * Block layer snapshot related functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SNAPSHOT_H
+#define SNAPSHOT_H
+
+#include "qemu-common.h"
+
+typedef struct QEMUSnapshotInfo {
+ char id_str[128]; /* unique snapshot id */
+ /* the following fields are informative. They are not needed for
+ the consistency of the snapshot */
+ char name[256]; /* user chosen name */
+ uint64_t vm_state_size; /* VM state info size */
+ uint32_t date_sec; /* UTC date of the snapshot */
+ uint32_t date_nsec;
+ uint64_t vm_clock_nsec; /* VM clock relative to boot */
+} QEMUSnapshotInfo;
+
+int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
+ const char *name);
+int bdrv_can_snapshot(BlockDriverState *bs);
+int bdrv_snapshot_create(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+int bdrv_snapshot_goto(BlockDriverState *bs,
+ const char *snapshot_id);
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
+int bdrv_snapshot_list(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+ const char *snapshot_name);
+#endif
diff --git a/contrib/qemu/include/config.h b/contrib/qemu/include/config.h
new file mode 100644
index 000000000..e20f78696
--- /dev/null
+++ b/contrib/qemu/include/config.h
@@ -0,0 +1,2 @@
+#include "config-host.h"
+#include "config-target.h"
diff --git a/contrib/qemu/include/exec/cpu-common.h b/contrib/qemu/include/exec/cpu-common.h
new file mode 100644
index 000000000..e4996e19c
--- /dev/null
+++ b/contrib/qemu/include/exec/cpu-common.h
@@ -0,0 +1,124 @@
+#ifndef CPU_COMMON_H
+#define CPU_COMMON_H 1
+
+/* CPU interfaces that are target independent. */
+
+#ifndef CONFIG_USER_ONLY
+#include "exec/hwaddr.h"
+#endif
+
+#ifndef NEED_CPU_H
+#include "exec/poison.h"
+#endif
+
+#include "qemu/bswap.h"
+#include "qemu/queue.h"
+
+/**
+ * CPUListState:
+ * @cpu_fprintf: Print function.
+ * @file: File to print to using @cpu_fprint.
+ *
+ * State commonly used for iterating over CPU models.
+ */
+typedef struct CPUListState {
+ fprintf_function cpu_fprintf;
+ FILE *file;
+} CPUListState;
+
+#if !defined(CONFIG_USER_ONLY)
+
+enum device_endian {
+ DEVICE_NATIVE_ENDIAN,
+ DEVICE_BIG_ENDIAN,
+ DEVICE_LITTLE_ENDIAN,
+};
+
+/* address in the RAM (different from a physical address) */
+#if defined(CONFIG_XEN_BACKEND)
+typedef uint64_t ram_addr_t;
+# define RAM_ADDR_MAX UINT64_MAX
+# define RAM_ADDR_FMT "%" PRIx64
+#else
+typedef uintptr_t ram_addr_t;
+# define RAM_ADDR_MAX UINTPTR_MAX
+# define RAM_ADDR_FMT "%" PRIxPTR
+#endif
+
+/* memory API */
+
+typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
+typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
+
+void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+/* This should not be used by devices. */
+MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
+
+void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
+ int len, int is_write);
+static inline void cpu_physical_memory_read(hwaddr addr,
+ void *buf, int len)
+{
+ cpu_physical_memory_rw(addr, buf, len, 0);
+}
+static inline void cpu_physical_memory_write(hwaddr addr,
+ const void *buf, int len)
+{
+ cpu_physical_memory_rw(addr, (void *)buf, len, 1);
+}
+void *cpu_physical_memory_map(hwaddr addr,
+ hwaddr *plen,
+ int is_write);
+void cpu_physical_memory_unmap(void *buffer, hwaddr len,
+ int is_write, hwaddr access_len);
+void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
+
+bool cpu_physical_memory_is_io(hwaddr phys_addr);
+
+/* Coalesced MMIO regions are areas where write operations can be reordered.
+ * This usually implies that write operations are side-effect free. This allows
+ * batching which can make a major impact on performance when using
+ * virtualization.
+ */
+void qemu_flush_coalesced_mmio_buffer(void);
+
+uint32_t ldub_phys(hwaddr addr);
+uint32_t lduw_le_phys(hwaddr addr);
+uint32_t lduw_be_phys(hwaddr addr);
+uint32_t ldl_le_phys(hwaddr addr);
+uint32_t ldl_be_phys(hwaddr addr);
+uint64_t ldq_le_phys(hwaddr addr);
+uint64_t ldq_be_phys(hwaddr addr);
+void stb_phys(hwaddr addr, uint32_t val);
+void stw_le_phys(hwaddr addr, uint32_t val);
+void stw_be_phys(hwaddr addr, uint32_t val);
+void stl_le_phys(hwaddr addr, uint32_t val);
+void stl_be_phys(hwaddr addr, uint32_t val);
+void stq_le_phys(hwaddr addr, uint64_t val);
+void stq_be_phys(hwaddr addr, uint64_t val);
+
+#ifdef NEED_CPU_H
+uint32_t lduw_phys(hwaddr addr);
+uint32_t ldl_phys(hwaddr addr);
+uint64_t ldq_phys(hwaddr addr);
+void stl_phys_notdirty(hwaddr addr, uint32_t val);
+void stw_phys(hwaddr addr, uint32_t val);
+void stl_phys(hwaddr addr, uint32_t val);
+void stq_phys(hwaddr addr, uint64_t val);
+#endif
+
+void cpu_physical_memory_write_rom(hwaddr addr,
+ const uint8_t *buf, int len);
+
+extern struct MemoryRegion io_mem_rom;
+extern struct MemoryRegion io_mem_notdirty;
+
+typedef void (RAMBlockIterFunc)(void *host_addr,
+ ram_addr_t offset, ram_addr_t length, void *opaque);
+
+void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+
+#endif
+
+#endif /* !CPU_COMMON_H */
diff --git a/contrib/qemu/include/exec/hwaddr.h b/contrib/qemu/include/exec/hwaddr.h
new file mode 100644
index 000000000..c9eb78fba
--- /dev/null
+++ b/contrib/qemu/include/exec/hwaddr.h
@@ -0,0 +1,20 @@
+/* Define hwaddr if it exists. */
+
+#ifndef HWADDR_H
+#define HWADDR_H
+
+#define HWADDR_BITS 64
+/* hwaddr is the type of a physical address (its size can
+ be different from 'target_ulong'). */
+
+typedef uint64_t hwaddr;
+#define HWADDR_MAX UINT64_MAX
+#define TARGET_FMT_plx "%016" PRIx64
+#define HWADDR_PRId PRId64
+#define HWADDR_PRIi PRIi64
+#define HWADDR_PRIo PRIo64
+#define HWADDR_PRIu PRIu64
+#define HWADDR_PRIx PRIx64
+#define HWADDR_PRIX PRIX64
+
+#endif
diff --git a/contrib/qemu/include/exec/poison.h b/contrib/qemu/include/exec/poison.h
new file mode 100644
index 000000000..2341a7504
--- /dev/null
+++ b/contrib/qemu/include/exec/poison.h
@@ -0,0 +1,63 @@
+/* Poison identifiers that should not be used when building
+ target independent device code. */
+
+#ifndef HW_POISON_H
+#define HW_POISON_H
+#ifdef __GNUC__
+
+#pragma GCC poison TARGET_I386
+#pragma GCC poison TARGET_X86_64
+#pragma GCC poison TARGET_ALPHA
+#pragma GCC poison TARGET_ARM
+#pragma GCC poison TARGET_CRIS
+#pragma GCC poison TARGET_LM32
+#pragma GCC poison TARGET_M68K
+#pragma GCC poison TARGET_MIPS
+#pragma GCC poison TARGET_MIPS64
+#pragma GCC poison TARGET_OPENRISC
+#pragma GCC poison TARGET_PPC
+#pragma GCC poison TARGET_PPCEMB
+#pragma GCC poison TARGET_PPC64
+#pragma GCC poison TARGET_ABI32
+#pragma GCC poison TARGET_SH4
+#pragma GCC poison TARGET_SPARC
+#pragma GCC poison TARGET_SPARC64
+
+#pragma GCC poison TARGET_WORDS_BIGENDIAN
+#pragma GCC poison BSWAP_NEEDED
+
+#pragma GCC poison TARGET_LONG_BITS
+#pragma GCC poison TARGET_FMT_lx
+#pragma GCC poison TARGET_FMT_ld
+
+#pragma GCC poison TARGET_PAGE_SIZE
+#pragma GCC poison TARGET_PAGE_MASK
+#pragma GCC poison TARGET_PAGE_BITS
+#pragma GCC poison TARGET_PAGE_ALIGN
+
+#pragma GCC poison CPUArchState
+#pragma GCC poison env
+
+#pragma GCC poison lduw_phys
+#pragma GCC poison ldl_phys
+#pragma GCC poison ldq_phys
+#pragma GCC poison stl_phys_notdirty
+#pragma GCC poison stw_phys
+#pragma GCC poison stl_phys
+#pragma GCC poison stq_phys
+
+#pragma GCC poison CPU_INTERRUPT_HARD
+#pragma GCC poison CPU_INTERRUPT_EXITTB
+#pragma GCC poison CPU_INTERRUPT_HALT
+#pragma GCC poison CPU_INTERRUPT_DEBUG
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_0
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_1
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_2
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_3
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_4
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_0
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_1
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_2
+
+#endif
+#endif
diff --git a/contrib/qemu/include/fpu/softfloat.h b/contrib/qemu/include/fpu/softfloat.h
new file mode 100644
index 000000000..f3927e241
--- /dev/null
+++ b/contrib/qemu/include/fpu/softfloat.h
@@ -0,0 +1,641 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#ifndef SOFTFLOAT_H
+#define SOFTFLOAT_H
+
+#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH)
+#include <sunmath.h>
+#endif
+
+#include <inttypes.h>
+#include "config-host.h"
+#include "qemu/osdep.h"
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified. For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+typedef uint8_t flag;
+typedef uint8_t uint8;
+typedef int8_t int8;
+typedef unsigned int uint32;
+typedef signed int int32;
+typedef uint64_t uint64;
+typedef int64_t int64;
+
+#define LIT64( a ) a##LL
+#define INLINE static inline
+
+#define STATUS_PARAM , float_status *status
+#define STATUS(field) status->field
+#define STATUS_VAR , status
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point ordering relations
+*----------------------------------------------------------------------------*/
+enum {
+ float_relation_less = -1,
+ float_relation_equal = 0,
+ float_relation_greater = 1,
+ float_relation_unordered = 2
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+/* Use structures for soft-float types. This prevents accidentally mixing
+ them with native int/float types. A sufficiently clever compiler and
+ sane ABI should be able to see though these structs. However
+ x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */
+//#define USE_SOFTFLOAT_STRUCT_TYPES
+#ifdef USE_SOFTFLOAT_STRUCT_TYPES
+typedef struct {
+ uint16_t v;
+} float16;
+#define float16_val(x) (((float16)(x)).v)
+#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
+#define const_float16(x) { x }
+typedef struct {
+ uint32_t v;
+} float32;
+/* The cast ensures an error if the wrong type is passed. */
+#define float32_val(x) (((float32)(x)).v)
+#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
+#define const_float32(x) { x }
+typedef struct {
+ uint64_t v;
+} float64;
+#define float64_val(x) (((float64)(x)).v)
+#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
+#define const_float64(x) { x }
+#else
+typedef uint16_t float16;
+typedef uint32_t float32;
+typedef uint64_t float64;
+#define float16_val(x) (x)
+#define float32_val(x) (x)
+#define float64_val(x) (x)
+#define make_float16(x) (x)
+#define make_float32(x) (x)
+#define make_float64(x) (x)
+#define const_float16(x) (x)
+#define const_float32(x) (x)
+#define const_float64(x) (x)
+#endif
+typedef struct {
+ uint64_t low;
+ uint16_t high;
+} floatx80;
+#define make_floatx80(exp, mant) ((floatx80) { mant, exp })
+#define make_floatx80_init(exp, mant) { .low = mant, .high = exp }
+typedef struct {
+#ifdef HOST_WORDS_BIGENDIAN
+ uint64_t high, low;
+#else
+ uint64_t low, high;
+#endif
+} float128;
+#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ })
+#define make_float128_init(high_, low_) { .high = high_, .low = low_ }
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_tininess_after_rounding = 0,
+ float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_round_nearest_even = 0,
+ float_round_down = 1,
+ float_round_up = 2,
+ float_round_to_zero = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+enum {
+ float_flag_invalid = 1,
+ float_flag_divbyzero = 4,
+ float_flag_overflow = 8,
+ float_flag_underflow = 16,
+ float_flag_inexact = 32,
+ float_flag_input_denormal = 64,
+ float_flag_output_denormal = 128
+};
+
+typedef struct float_status {
+ signed char float_detect_tininess;
+ signed char float_rounding_mode;
+ signed char float_exception_flags;
+ signed char floatx80_rounding_precision;
+ /* should denormalised results go to zero and set the inexact flag? */
+ flag flush_to_zero;
+ /* should denormalised inputs go to zero and set the input_denormal flag? */
+ flag flush_inputs_to_zero;
+ flag default_nan_mode;
+} float_status;
+
+void set_float_rounding_mode(int val STATUS_PARAM);
+void set_float_exception_flags(int val STATUS_PARAM);
+INLINE void set_float_detect_tininess(int val STATUS_PARAM)
+{
+ STATUS(float_detect_tininess) = val;
+}
+INLINE void set_flush_to_zero(flag val STATUS_PARAM)
+{
+ STATUS(flush_to_zero) = val;
+}
+INLINE void set_flush_inputs_to_zero(flag val STATUS_PARAM)
+{
+ STATUS(flush_inputs_to_zero) = val;
+}
+INLINE void set_default_nan_mode(flag val STATUS_PARAM)
+{
+ STATUS(default_nan_mode) = val;
+}
+INLINE int get_float_exception_flags(float_status *status)
+{
+ return STATUS(float_exception_flags);
+}
+void set_floatx80_rounding_precision(int val STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 flags STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Options to indicate which negations to perform in float*_muladd()
+| Using these differs from negating an input or output before calling
+| the muladd function in that this means that a NaN doesn't have its
+| sign bit inverted before it is propagated.
+*----------------------------------------------------------------------------*/
+enum {
+ float_muladd_negate_c = 1,
+ float_muladd_negate_product = 2,
+ float_muladd_negate_result = 4,
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int32 STATUS_PARAM );
+float64 int32_to_float64( int32 STATUS_PARAM );
+float32 uint32_to_float32( uint32 STATUS_PARAM );
+float64 uint32_to_float64( uint32 STATUS_PARAM );
+floatx80 int32_to_floatx80( int32 STATUS_PARAM );
+float128 int32_to_float128( int32 STATUS_PARAM );
+float32 int64_to_float32( int64 STATUS_PARAM );
+float32 uint64_to_float32( uint64 STATUS_PARAM );
+float64 int64_to_float64( int64 STATUS_PARAM );
+float64 uint64_to_float64( uint64 STATUS_PARAM );
+floatx80 int64_to_floatx80( int64 STATUS_PARAM );
+float128 int64_to_float128( int64 STATUS_PARAM );
+float128 uint64_to_float128( uint64 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision conversion routines.
+*----------------------------------------------------------------------------*/
+float16 float32_to_float16( float32, flag STATUS_PARAM );
+float32 float16_to_float32( float16, flag STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision operations.
+*----------------------------------------------------------------------------*/
+int float16_is_quiet_nan( float16 );
+int float16_is_signaling_nan( float16 );
+float16 float16_maybe_silence_nan( float16 );
+
+INLINE int float16_is_any_nan(float16 a)
+{
+ return ((float16_val(a) & ~0x8000) > 0x7c00);
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float16 float16_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int_fast16_t float32_to_int16_round_to_zero(float32 STATUS_PARAM);
+uint_fast16_t float32_to_uint16_round_to_zero(float32 STATUS_PARAM);
+int32 float32_to_int32( float32 STATUS_PARAM );
+int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
+uint32 float32_to_uint32( float32 STATUS_PARAM );
+uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
+int64 float32_to_int64( float32 STATUS_PARAM );
+int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
+float64 float32_to_float64( float32 STATUS_PARAM );
+floatx80 float32_to_floatx80( float32 STATUS_PARAM );
+float128 float32_to_float128( float32 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 STATUS_PARAM );
+float32 float32_add( float32, float32 STATUS_PARAM );
+float32 float32_sub( float32, float32 STATUS_PARAM );
+float32 float32_mul( float32, float32 STATUS_PARAM );
+float32 float32_div( float32, float32 STATUS_PARAM );
+float32 float32_rem( float32, float32 STATUS_PARAM );
+float32 float32_muladd(float32, float32, float32, int STATUS_PARAM);
+float32 float32_sqrt( float32 STATUS_PARAM );
+float32 float32_exp2( float32 STATUS_PARAM );
+float32 float32_log2( float32 STATUS_PARAM );
+int float32_eq( float32, float32 STATUS_PARAM );
+int float32_le( float32, float32 STATUS_PARAM );
+int float32_lt( float32, float32 STATUS_PARAM );
+int float32_unordered( float32, float32 STATUS_PARAM );
+int float32_eq_quiet( float32, float32 STATUS_PARAM );
+int float32_le_quiet( float32, float32 STATUS_PARAM );
+int float32_lt_quiet( float32, float32 STATUS_PARAM );
+int float32_unordered_quiet( float32, float32 STATUS_PARAM );
+int float32_compare( float32, float32 STATUS_PARAM );
+int float32_compare_quiet( float32, float32 STATUS_PARAM );
+float32 float32_min(float32, float32 STATUS_PARAM);
+float32 float32_max(float32, float32 STATUS_PARAM);
+int float32_is_quiet_nan( float32 );
+int float32_is_signaling_nan( float32 );
+float32 float32_maybe_silence_nan( float32 );
+float32 float32_scalbn( float32, int STATUS_PARAM );
+
+INLINE float32 float32_abs(float32 a)
+{
+ /* Note that abs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float32(float32_val(a) & 0x7fffffff);
+}
+
+INLINE float32 float32_chs(float32 a)
+{
+ /* Note that chs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float32(float32_val(a) ^ 0x80000000);
+}
+
+INLINE int float32_is_infinity(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0x7f800000;
+}
+
+INLINE int float32_is_neg(float32 a)
+{
+ return float32_val(a) >> 31;
+}
+
+INLINE int float32_is_zero(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0;
+}
+
+INLINE int float32_is_any_nan(float32 a)
+{
+ return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL);
+}
+
+INLINE int float32_is_zero_or_denormal(float32 a)
+{
+ return (float32_val(a) & 0x7f800000) == 0;
+}
+
+INLINE float32 float32_set_sign(float32 a, int sign)
+{
+ return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
+}
+
+#define float32_zero make_float32(0)
+#define float32_one make_float32(0x3f800000)
+#define float32_ln2 make_float32(0x3f317218)
+#define float32_pi make_float32(0x40490fdb)
+#define float32_half make_float32(0x3f000000)
+#define float32_infinity make_float32(0x7f800000)
+
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float32 float32_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int_fast16_t float64_to_int16_round_to_zero(float64 STATUS_PARAM);
+uint_fast16_t float64_to_uint16_round_to_zero(float64 STATUS_PARAM);
+int32 float64_to_int32( float64 STATUS_PARAM );
+int32 float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+uint32 float64_to_uint32( float64 STATUS_PARAM );
+uint32 float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
+int64 float64_to_int64( float64 STATUS_PARAM );
+int64 float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+uint64 float64_to_uint64 (float64 a STATUS_PARAM);
+uint64 float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
+float32 float64_to_float32( float64 STATUS_PARAM );
+floatx80 float64_to_floatx80( float64 STATUS_PARAM );
+float128 float64_to_float128( float64 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 STATUS_PARAM );
+float64 float64_trunc_to_int( float64 STATUS_PARAM );
+float64 float64_add( float64, float64 STATUS_PARAM );
+float64 float64_sub( float64, float64 STATUS_PARAM );
+float64 float64_mul( float64, float64 STATUS_PARAM );
+float64 float64_div( float64, float64 STATUS_PARAM );
+float64 float64_rem( float64, float64 STATUS_PARAM );
+float64 float64_muladd(float64, float64, float64, int STATUS_PARAM);
+float64 float64_sqrt( float64 STATUS_PARAM );
+float64 float64_log2( float64 STATUS_PARAM );
+int float64_eq( float64, float64 STATUS_PARAM );
+int float64_le( float64, float64 STATUS_PARAM );
+int float64_lt( float64, float64 STATUS_PARAM );
+int float64_unordered( float64, float64 STATUS_PARAM );
+int float64_eq_quiet( float64, float64 STATUS_PARAM );
+int float64_le_quiet( float64, float64 STATUS_PARAM );
+int float64_lt_quiet( float64, float64 STATUS_PARAM );
+int float64_unordered_quiet( float64, float64 STATUS_PARAM );
+int float64_compare( float64, float64 STATUS_PARAM );
+int float64_compare_quiet( float64, float64 STATUS_PARAM );
+float64 float64_min(float64, float64 STATUS_PARAM);
+float64 float64_max(float64, float64 STATUS_PARAM);
+int float64_is_quiet_nan( float64 a );
+int float64_is_signaling_nan( float64 );
+float64 float64_maybe_silence_nan( float64 );
+float64 float64_scalbn( float64, int STATUS_PARAM );
+
+INLINE float64 float64_abs(float64 a)
+{
+ /* Note that abs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float64(float64_val(a) & 0x7fffffffffffffffLL);
+}
+
+INLINE float64 float64_chs(float64 a)
+{
+ /* Note that chs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float64(float64_val(a) ^ 0x8000000000000000LL);
+}
+
+INLINE int float64_is_infinity(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL;
+}
+
+INLINE int float64_is_neg(float64 a)
+{
+ return float64_val(a) >> 63;
+}
+
+INLINE int float64_is_zero(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
+}
+
+INLINE int float64_is_any_nan(float64 a)
+{
+ return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
+}
+
+INLINE int float64_is_zero_or_denormal(float64 a)
+{
+ return (float64_val(a) & 0x7ff0000000000000LL) == 0;
+}
+
+INLINE float64 float64_set_sign(float64 a, int sign)
+{
+ return make_float64((float64_val(a) & 0x7fffffffffffffffULL)
+ | ((int64_t)sign << 63));
+}
+
+#define float64_zero make_float64(0)
+#define float64_one make_float64(0x3ff0000000000000LL)
+#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
+#define float64_pi make_float64(0x400921fb54442d18LL)
+#define float64_half make_float64(0x3fe0000000000000LL)
+#define float64_infinity make_float64(0x7ff0000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float64 float64_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 floatx80_to_int32( floatx80 STATUS_PARAM );
+int32 floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
+float32 floatx80_to_float32( floatx80 STATUS_PARAM );
+float64 floatx80_to_float64( floatx80 STATUS_PARAM );
+float128 floatx80_to_float128( floatx80 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
+floatx80 floatx80_add( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sub( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_mul( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_div( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
+int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered( floatx80, floatx80 STATUS_PARAM );
+int floatx80_eq_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_quiet_nan( floatx80 );
+int floatx80_is_signaling_nan( floatx80 );
+floatx80 floatx80_maybe_silence_nan( floatx80 );
+floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM );
+
+INLINE floatx80 floatx80_abs(floatx80 a)
+{
+ a.high &= 0x7fff;
+ return a;
+}
+
+INLINE floatx80 floatx80_chs(floatx80 a)
+{
+ a.high ^= 0x8000;
+ return a;
+}
+
+INLINE int floatx80_is_infinity(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0x7fff && a.low == 0x8000000000000000LL;
+}
+
+INLINE int floatx80_is_neg(floatx80 a)
+{
+ return a.high >> 15;
+}
+
+INLINE int floatx80_is_zero(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0 && a.low == 0;
+}
+
+INLINE int floatx80_is_zero_or_denormal(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0;
+}
+
+INLINE int floatx80_is_any_nan(floatx80 a)
+{
+ return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
+}
+
+#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
+#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
+#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
+#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
+#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
+#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const floatx80 floatx80_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float128_to_int32( float128 STATUS_PARAM );
+int32 float128_to_int32_round_to_zero( float128 STATUS_PARAM );
+int64 float128_to_int64( float128 STATUS_PARAM );
+int64 float128_to_int64_round_to_zero( float128 STATUS_PARAM );
+float32 float128_to_float32( float128 STATUS_PARAM );
+float64 float128_to_float64( float128 STATUS_PARAM );
+floatx80 float128_to_floatx80( float128 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 STATUS_PARAM );
+float128 float128_add( float128, float128 STATUS_PARAM );
+float128 float128_sub( float128, float128 STATUS_PARAM );
+float128 float128_mul( float128, float128 STATUS_PARAM );
+float128 float128_div( float128, float128 STATUS_PARAM );
+float128 float128_rem( float128, float128 STATUS_PARAM );
+float128 float128_sqrt( float128 STATUS_PARAM );
+int float128_eq( float128, float128 STATUS_PARAM );
+int float128_le( float128, float128 STATUS_PARAM );
+int float128_lt( float128, float128 STATUS_PARAM );
+int float128_unordered( float128, float128 STATUS_PARAM );
+int float128_eq_quiet( float128, float128 STATUS_PARAM );
+int float128_le_quiet( float128, float128 STATUS_PARAM );
+int float128_lt_quiet( float128, float128 STATUS_PARAM );
+int float128_unordered_quiet( float128, float128 STATUS_PARAM );
+int float128_compare( float128, float128 STATUS_PARAM );
+int float128_compare_quiet( float128, float128 STATUS_PARAM );
+int float128_is_quiet_nan( float128 );
+int float128_is_signaling_nan( float128 );
+float128 float128_maybe_silence_nan( float128 );
+float128 float128_scalbn( float128, int STATUS_PARAM );
+
+INLINE float128 float128_abs(float128 a)
+{
+ a.high &= 0x7fffffffffffffffLL;
+ return a;
+}
+
+INLINE float128 float128_chs(float128 a)
+{
+ a.high ^= 0x8000000000000000LL;
+ return a;
+}
+
+INLINE int float128_is_infinity(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0;
+}
+
+INLINE int float128_is_neg(float128 a)
+{
+ return a.high >> 63;
+}
+
+INLINE int float128_is_zero(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
+}
+
+INLINE int float128_is_zero_or_denormal(float128 a)
+{
+ return (a.high & 0x7fff000000000000LL) == 0;
+}
+
+INLINE int float128_is_any_nan(float128 a)
+{
+ return ((a.high >> 48) & 0x7fff) == 0x7fff &&
+ ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
+}
+
+#define float128_zero make_float128(0, 0)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float128 float128_default_nan;
+
+#endif /* !SOFTFLOAT_H */
diff --git a/contrib/qemu/include/glib-compat.h b/contrib/qemu/include/glib-compat.h
new file mode 100644
index 000000000..8aa77afd6
--- /dev/null
+++ b/contrib/qemu/include/glib-compat.h
@@ -0,0 +1,27 @@
+/*
+ * GLIB Compatibility Functions
+ *
+ * Copyright IBM, Corp. 2013
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_GLIB_COMPAT_H
+#define QEMU_GLIB_COMPAT_H
+
+#include <glib.h>
+
+#if !GLIB_CHECK_VERSION(2, 14, 0)
+static inline guint g_timeout_add_seconds(guint interval, GSourceFunc function,
+ gpointer data)
+{
+ return g_timeout_add(interval * 1000, function, data);
+}
+#endif
+
+#endif
diff --git a/contrib/qemu/include/migration/migration.h b/contrib/qemu/include/migration/migration.h
new file mode 100644
index 000000000..bc9fde0b2
--- /dev/null
+++ b/contrib/qemu/include/migration/migration.h
@@ -0,0 +1,157 @@
+/*
+ * QEMU live migration
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_MIGRATION_H
+#define QEMU_MIGRATION_H
+
+#include "qapi/qmp/qdict.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/notify.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "qapi-types.h"
+#include "exec/cpu-common.h"
+
+struct MigrationParams {
+ bool blk;
+ bool shared;
+};
+
+typedef struct MigrationState MigrationState;
+
+struct MigrationState
+{
+ int64_t bandwidth_limit;
+ size_t bytes_xfer;
+ size_t xfer_limit;
+ QemuThread thread;
+ QEMUBH *cleanup_bh;
+ QEMUFile *file;
+
+ int state;
+ MigrationParams params;
+ double mbps;
+ int64_t total_time;
+ int64_t downtime;
+ int64_t expected_downtime;
+ int64_t dirty_pages_rate;
+ int64_t dirty_bytes_rate;
+ bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
+ int64_t xbzrle_cache_size;
+};
+
+void process_incoming_migration(QEMUFile *f);
+
+void qemu_start_incoming_migration(const char *uri, Error **errp);
+
+uint64_t migrate_max_downtime(void);
+
+void do_info_migrate_print(Monitor *mon, const QObject *data);
+
+void do_info_migrate(Monitor *mon, QObject **ret_data);
+
+void exec_start_incoming_migration(const char *host_port, Error **errp);
+
+void exec_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
+
+void tcp_start_incoming_migration(const char *host_port, Error **errp);
+
+void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
+
+void unix_start_incoming_migration(const char *path, Error **errp);
+
+void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
+
+void fd_start_incoming_migration(const char *path, Error **errp);
+
+void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
+
+void migrate_fd_error(MigrationState *s);
+
+void migrate_fd_connect(MigrationState *s);
+
+int migrate_fd_close(MigrationState *s);
+
+void add_migration_state_change_notifier(Notifier *notify);
+void remove_migration_state_change_notifier(Notifier *notify);
+bool migration_is_active(MigrationState *);
+bool migration_has_finished(MigrationState *);
+bool migration_has_failed(MigrationState *);
+MigrationState *migrate_get_current(void);
+
+uint64_t ram_bytes_remaining(void);
+uint64_t ram_bytes_transferred(void);
+uint64_t ram_bytes_total(void);
+
+void acct_update_position(QEMUFile *f, size_t size, bool zero);
+
+extern SaveVMHandlers savevm_ram_handlers;
+
+uint64_t dup_mig_bytes_transferred(void);
+uint64_t dup_mig_pages_transferred(void);
+uint64_t skipped_mig_bytes_transferred(void);
+uint64_t skipped_mig_pages_transferred(void);
+uint64_t norm_mig_bytes_transferred(void);
+uint64_t norm_mig_pages_transferred(void);
+uint64_t xbzrle_mig_bytes_transferred(void);
+uint64_t xbzrle_mig_pages_transferred(void);
+uint64_t xbzrle_mig_pages_overflow(void);
+uint64_t xbzrle_mig_pages_cache_miss(void);
+
+/**
+ * @migrate_add_blocker - prevent migration from proceeding
+ *
+ * @reason - an error to be returned whenever migration is attempted
+ */
+void migrate_add_blocker(Error *reason);
+
+/**
+ * @migrate_del_blocker - remove a blocking error from migration
+ *
+ * @reason - the error blocking migration
+ */
+void migrate_del_blocker(Error *reason);
+
+bool migrate_rdma_pin_all(void);
+
+bool migrate_auto_converge(void);
+
+int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
+ uint8_t *dst, int dlen);
+int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
+
+int migrate_use_xbzrle(void);
+int64_t migrate_xbzrle_cache_size(void);
+
+int64_t xbzrle_cache_resize(int64_t new_size);
+
+void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
+void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
+void ram_control_load_hook(QEMUFile *f, uint64_t flags);
+
+/* Whenever this is found in the data stream, the flags
+ * will be passed to ram_control_load_hook in the incoming-migration
+ * side. This lets before_ram_iterate/after_ram_iterate add
+ * transport-specific sections to the RAM migration data.
+ */
+#define RAM_SAVE_FLAG_HOOK 0x80
+
+#define RAM_SAVE_CONTROL_NOT_SUPP -1000
+#define RAM_SAVE_CONTROL_DELAYED -2000
+
+size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+ ram_addr_t offset, size_t size,
+ int *bytes_sent);
+
+#endif
diff --git a/contrib/qemu/include/migration/qemu-file.h b/contrib/qemu/include/migration/qemu-file.h
new file mode 100644
index 000000000..0f757fbeb
--- /dev/null
+++ b/contrib/qemu/include/migration/qemu-file.h
@@ -0,0 +1,266 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_FILE_H
+#define QEMU_FILE_H 1
+#include "exec/cpu-common.h"
+
+/* This function writes a chunk of data to a file at the given position.
+ * The pos argument can be ignored if the file is only being used for
+ * streaming. The handler should try to write all of the data it can.
+ */
+typedef int (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf,
+ int64_t pos, int size);
+
+/* Read a chunk of data from a file at the given position. The pos argument
+ * can be ignored if the file is only be used for streaming. The number of
+ * bytes actually read should be returned.
+ */
+typedef int (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf,
+ int64_t pos, int size);
+
+/* Close a file
+ *
+ * Return negative error number on error, 0 or positive value on success.
+ *
+ * The meaning of return value on success depends on the specific back-end being
+ * used.
+ */
+typedef int (QEMUFileCloseFunc)(void *opaque);
+
+/* Called to return the OS file descriptor associated to the QEMUFile.
+ */
+typedef int (QEMUFileGetFD)(void *opaque);
+
+/*
+ * This function writes an iovec to file.
+ */
+typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
+ int iovcnt, int64_t pos);
+
+/*
+ * This function provides hooks around different
+ * stages of RAM migration.
+ */
+typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
+
+/*
+ * Constants used by ram_control_* hooks
+ */
+#define RAM_CONTROL_SETUP 0
+#define RAM_CONTROL_ROUND 1
+#define RAM_CONTROL_HOOK 2
+#define RAM_CONTROL_FINISH 3
+
+/*
+ * This function allows override of where the RAM page
+ * is saved (such as RDMA, for example.)
+ */
+typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
+ ram_addr_t block_offset,
+ ram_addr_t offset,
+ size_t size,
+ int *bytes_sent);
+
+typedef struct QEMUFileOps {
+ QEMUFilePutBufferFunc *put_buffer;
+ QEMUFileGetBufferFunc *get_buffer;
+ QEMUFileCloseFunc *close;
+ QEMUFileGetFD *get_fd;
+ QEMUFileWritevBufferFunc *writev_buffer;
+ QEMURamHookFunc *before_ram_iterate;
+ QEMURamHookFunc *after_ram_iterate;
+ QEMURamHookFunc *hook_ram_load;
+ QEMURamSaveFunc *save_page;
+} QEMUFileOps;
+
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
+QEMUFile *qemu_fopen(const char *filename, const char *mode);
+QEMUFile *qemu_fdopen(int fd, const char *mode);
+QEMUFile *qemu_fopen_socket(int fd, const char *mode);
+QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
+int qemu_get_fd(QEMUFile *f);
+int qemu_fclose(QEMUFile *f);
+int64_t qemu_ftell(QEMUFile *f);
+void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
+void qemu_put_byte(QEMUFile *f, int v);
+/*
+ * put_buffer without copying the buffer.
+ * The buffer should be available till it is sent asynchronously.
+ */
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
+bool qemu_file_mode_is_not_valid(const char *mode);
+
+static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
+{
+ qemu_put_byte(f, (int)v);
+}
+
+#define qemu_put_sbyte qemu_put_byte
+
+void qemu_put_be16(QEMUFile *f, unsigned int v);
+void qemu_put_be32(QEMUFile *f, unsigned int v);
+void qemu_put_be64(QEMUFile *f, uint64_t v);
+int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
+int qemu_get_byte(QEMUFile *f);
+void qemu_update_position(QEMUFile *f, size_t size);
+
+static inline unsigned int qemu_get_ubyte(QEMUFile *f)
+{
+ return (unsigned int)qemu_get_byte(f);
+}
+
+#define qemu_get_sbyte qemu_get_byte
+
+unsigned int qemu_get_be16(QEMUFile *f);
+unsigned int qemu_get_be32(QEMUFile *f);
+uint64_t qemu_get_be64(QEMUFile *f);
+
+int qemu_file_rate_limit(QEMUFile *f);
+void qemu_file_reset_rate_limit(QEMUFile *f);
+void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
+int64_t qemu_file_get_rate_limit(QEMUFile *f);
+int qemu_file_get_error(QEMUFile *f);
+void qemu_fflush(QEMUFile *f);
+
+static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
+{
+ qemu_put_be64(f, *pv);
+}
+
+static inline void qemu_put_be32s(QEMUFile *f, const uint32_t *pv)
+{
+ qemu_put_be32(f, *pv);
+}
+
+static inline void qemu_put_be16s(QEMUFile *f, const uint16_t *pv)
+{
+ qemu_put_be16(f, *pv);
+}
+
+static inline void qemu_put_8s(QEMUFile *f, const uint8_t *pv)
+{
+ qemu_put_byte(f, *pv);
+}
+
+static inline void qemu_get_be64s(QEMUFile *f, uint64_t *pv)
+{
+ *pv = qemu_get_be64(f);
+}
+
+static inline void qemu_get_be32s(QEMUFile *f, uint32_t *pv)
+{
+ *pv = qemu_get_be32(f);
+}
+
+static inline void qemu_get_be16s(QEMUFile *f, uint16_t *pv)
+{
+ *pv = qemu_get_be16(f);
+}
+
+static inline void qemu_get_8s(QEMUFile *f, uint8_t *pv)
+{
+ *pv = qemu_get_byte(f);
+}
+
+// Signed versions for type safety
+static inline void qemu_put_sbuffer(QEMUFile *f, const int8_t *buf, int size)
+{
+ qemu_put_buffer(f, (const uint8_t *)buf, size);
+}
+
+static inline void qemu_put_sbe16(QEMUFile *f, int v)
+{
+ qemu_put_be16(f, (unsigned int)v);
+}
+
+static inline void qemu_put_sbe32(QEMUFile *f, int v)
+{
+ qemu_put_be32(f, (unsigned int)v);
+}
+
+static inline void qemu_put_sbe64(QEMUFile *f, int64_t v)
+{
+ qemu_put_be64(f, (uint64_t)v);
+}
+
+static inline size_t qemu_get_sbuffer(QEMUFile *f, int8_t *buf, int size)
+{
+ return qemu_get_buffer(f, (uint8_t *)buf, size);
+}
+
+static inline int qemu_get_sbe16(QEMUFile *f)
+{
+ return (int)qemu_get_be16(f);
+}
+
+static inline int qemu_get_sbe32(QEMUFile *f)
+{
+ return (int)qemu_get_be32(f);
+}
+
+static inline int64_t qemu_get_sbe64(QEMUFile *f)
+{
+ return (int64_t)qemu_get_be64(f);
+}
+
+static inline void qemu_put_s8s(QEMUFile *f, const int8_t *pv)
+{
+ qemu_put_8s(f, (const uint8_t *)pv);
+}
+
+static inline void qemu_put_sbe16s(QEMUFile *f, const int16_t *pv)
+{
+ qemu_put_be16s(f, (const uint16_t *)pv);
+}
+
+static inline void qemu_put_sbe32s(QEMUFile *f, const int32_t *pv)
+{
+ qemu_put_be32s(f, (const uint32_t *)pv);
+}
+
+static inline void qemu_put_sbe64s(QEMUFile *f, const int64_t *pv)
+{
+ qemu_put_be64s(f, (const uint64_t *)pv);
+}
+
+static inline void qemu_get_s8s(QEMUFile *f, int8_t *pv)
+{
+ qemu_get_8s(f, (uint8_t *)pv);
+}
+
+static inline void qemu_get_sbe16s(QEMUFile *f, int16_t *pv)
+{
+ qemu_get_be16s(f, (uint16_t *)pv);
+}
+
+static inline void qemu_get_sbe32s(QEMUFile *f, int32_t *pv)
+{
+ qemu_get_be32s(f, (uint32_t *)pv);
+}
+
+static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
+{
+ qemu_get_be64s(f, (uint64_t *)pv);
+}
+#endif
diff --git a/contrib/qemu/include/migration/vmstate.h b/contrib/qemu/include/migration/vmstate.h
new file mode 100644
index 000000000..1c31b5d6f
--- /dev/null
+++ b/contrib/qemu/include/migration/vmstate.h
@@ -0,0 +1,740 @@
+/*
+ * QEMU migration/snapshot declarations
+ *
+ * Copyright (c) 2009-2011 Red Hat, Inc.
+ *
+ * Original author: Juan Quintela <quintela@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_VMSTATE_H
+#define QEMU_VMSTATE_H 1
+
+#ifndef CONFIG_USER_ONLY
+#include <migration/qemu-file.h>
+#endif
+
+typedef void SaveStateHandler(QEMUFile *f, void *opaque);
+typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
+
+typedef struct SaveVMHandlers {
+ /* This runs inside the iothread lock. */
+ void (*set_params)(const MigrationParams *params, void * opaque);
+ SaveStateHandler *save_state;
+
+ void (*cancel)(void *opaque);
+ int (*save_live_complete)(QEMUFile *f, void *opaque);
+
+ /* This runs both outside and inside the iothread lock. */
+ bool (*is_active)(void *opaque);
+
+ /* This runs outside the iothread lock in the migration case, and
+ * within the lock in the savevm case. The callback had better only
+ * use data that is local to the migration thread or protected
+ * by other locks.
+ */
+ int (*save_live_iterate)(QEMUFile *f, void *opaque);
+
+ /* This runs outside the iothread lock! */
+ int (*save_live_setup)(QEMUFile *f, void *opaque);
+ uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size);
+
+ LoadStateHandler *load_state;
+} SaveVMHandlers;
+
+int register_savevm(DeviceState *dev,
+ const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveStateHandler *save_state,
+ LoadStateHandler *load_state,
+ void *opaque);
+
+int register_savevm_live(DeviceState *dev,
+ const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveVMHandlers *ops,
+ void *opaque);
+
+void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque);
+void register_device_unmigratable(DeviceState *dev, const char *idstr,
+ void *opaque);
+
+
+typedef struct VMStateInfo VMStateInfo;
+typedef struct VMStateDescription VMStateDescription;
+
+struct VMStateInfo {
+ const char *name;
+ int (*get)(QEMUFile *f, void *pv, size_t size);
+ void (*put)(QEMUFile *f, void *pv, size_t size);
+};
+
+enum VMStateFlags {
+ VMS_SINGLE = 0x001,
+ VMS_POINTER = 0x002,
+ VMS_ARRAY = 0x004,
+ VMS_STRUCT = 0x008,
+ VMS_VARRAY_INT32 = 0x010, /* Array with size in int32_t field*/
+ VMS_BUFFER = 0x020, /* static sized buffer */
+ VMS_ARRAY_OF_POINTER = 0x040,
+ VMS_VARRAY_UINT16 = 0x080, /* Array with size in uint16_t field */
+ VMS_VBUFFER = 0x100, /* Buffer with size in int32_t field */
+ VMS_MULTIPLY = 0x200, /* multiply "size" field by field_size */
+ VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/
+ VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/
+};
+
+typedef struct {
+ const char *name;
+ size_t offset;
+ size_t size;
+ size_t start;
+ int num;
+ size_t num_offset;
+ size_t size_offset;
+ const VMStateInfo *info;
+ enum VMStateFlags flags;
+ const VMStateDescription *vmsd;
+ int version_id;
+ bool (*field_exists)(void *opaque, int version_id);
+} VMStateField;
+
+typedef struct VMStateSubsection {
+ const VMStateDescription *vmsd;
+ bool (*needed)(void *opaque);
+} VMStateSubsection;
+
+struct VMStateDescription {
+ const char *name;
+ int unmigratable;
+ int version_id;
+ int minimum_version_id;
+ int minimum_version_id_old;
+ LoadStateHandler *load_state_old;
+ int (*pre_load)(void *opaque);
+ int (*post_load)(void *opaque, int version_id);
+ void (*pre_save)(void *opaque);
+ VMStateField *fields;
+ const VMStateSubsection *subsections;
+};
+
+#ifdef CONFIG_USER_ONLY
+extern const VMStateDescription vmstate_dummy;
+#endif
+
+extern const VMStateInfo vmstate_info_bool;
+
+extern const VMStateInfo vmstate_info_int8;
+extern const VMStateInfo vmstate_info_int16;
+extern const VMStateInfo vmstate_info_int32;
+extern const VMStateInfo vmstate_info_int64;
+
+extern const VMStateInfo vmstate_info_uint8_equal;
+extern const VMStateInfo vmstate_info_uint16_equal;
+extern const VMStateInfo vmstate_info_int32_equal;
+extern const VMStateInfo vmstate_info_uint32_equal;
+extern const VMStateInfo vmstate_info_uint64_equal;
+extern const VMStateInfo vmstate_info_int32_le;
+
+extern const VMStateInfo vmstate_info_uint8;
+extern const VMStateInfo vmstate_info_uint16;
+extern const VMStateInfo vmstate_info_uint32;
+extern const VMStateInfo vmstate_info_uint64;
+
+extern const VMStateInfo vmstate_info_float64;
+
+extern const VMStateInfo vmstate_info_timer;
+extern const VMStateInfo vmstate_info_buffer;
+extern const VMStateInfo vmstate_info_unused_buffer;
+extern const VMStateInfo vmstate_info_bitmap;
+
+#define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
+#define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0)
+#define type_check_pointer(t1,t2) ((t1**)0 - (t2*)0)
+
+#define vmstate_offset_value(_state, _field, _type) \
+ (offsetof(_state, _field) + \
+ type_check(_type, typeof_field(_state, _field)))
+
+#define vmstate_offset_pointer(_state, _field, _type) \
+ (offsetof(_state, _field) + \
+ type_check_pointer(_type, typeof_field(_state, _field)))
+
+#define vmstate_offset_array(_state, _field, _type, _num) \
+ (offsetof(_state, _field) + \
+ type_check_array(_type, typeof_field(_state, _field), _num))
+
+#define vmstate_offset_2darray(_state, _field, _type, _n1, _n2) \
+ (offsetof(_state, _field) + \
+ type_check_2darray(_type, typeof_field(_state, _field), _n1, _n2))
+
+#define vmstate_offset_sub_array(_state, _field, _type, _start) \
+ (offsetof(_state, _field[_start]))
+
+#define vmstate_offset_buffer(_state, _field) \
+ vmstate_offset_array(_state, _field, uint8_t, \
+ sizeof(typeof_field(_state, _field)))
+
+#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size = sizeof(_type), \
+ .info = &(_info), \
+ .flags = VMS_SINGLE, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_POINTER(_field, _state, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_SINGLE|VMS_POINTER, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_POINTER_TEST(_field, _state, _test, _info, _type) { \
+ .name = (stringify(_field)), \
+ .info = &(_info), \
+ .field_exists = (_test), \
+ .size = sizeof(_type), \
+ .flags = VMS_SINGLE|VMS_POINTER, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num), \
+}
+
+#define VMSTATE_2DARRAY(_field, _state, _n1, _n2, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_n1) * (_n2), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_2darray(_state, _field, _type, _n1, _n2), \
+}
+
+#define VMSTATE_ARRAY_TEST(_field, _state, _num, _test, _info, _type) {\
+ .name = (stringify(_field)), \
+ .field_exists = (_test), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num),\
+}
+
+#define VMSTATE_SUB_ARRAY(_field, _state, _start, _num, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_sub_array(_state, _field, _type, _start), \
+}
+
+#define VMSTATE_ARRAY_INT32_UNSAFE(_field, _state, _field_num, _info, _type) {\
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_INT32, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_VARRAY_INT32(_field, _state, _field_num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_INT32|VMS_POINTER, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_VARRAY_UINT32(_field, _state, _field_num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_UINT32|VMS_POINTER, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_VARRAY_UINT16_UNSAFE(_field, _state, _field_num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_UINT16, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_POINTER_TEST(_field, _state, _test, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .field_exists = (_test), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_POINTER, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_ARRAY_OF_POINTER(_field, _state, _num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY|VMS_ARRAY_OF_POINTER, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num), \
+}
+
+#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num = (_num), \
+ .field_exists = (_test), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_ARRAY, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num),\
+}
+
+#define VMSTATE_STRUCT_VARRAY_UINT8(_field, _state, _field_num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint8_t), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_UINT8, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_INT32(_field, _state, _field_num, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = 0, \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .size = sizeof(_type), \
+ .vmsd = &(_vmsd), \
+ .flags = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_UINT32(_field, _state, _field_num, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = 0, \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+ .size = sizeof(_type), \
+ .vmsd = &(_vmsd), \
+ .flags = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_UINT16(_field, _state, _field_num, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = 0, \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\
+ .size = sizeof(_type), \
+ .vmsd = &(_vmsd), \
+ .flags = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_INT32(_field, _state, _field_num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_INT32, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_UINT32(_field, _state, _field_num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_UINT32, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size = (_size - _start), \
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_BUFFER, \
+ .offset = vmstate_offset_buffer(_state, _field) + _start, \
+}
+
+#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\
+ .size = (_multiply), \
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_VBUFFER|VMS_POINTER|VMS_MULTIPLY, \
+ .offset = offsetof(_state, _field), \
+ .start = (_start), \
+}
+
+#define VMSTATE_VBUFFER(_field, _state, _version, _test, _start, _field_size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size_offset = vmstate_offset_value(_state, _field_size, int32_t),\
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_VBUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+ .start = (_start), \
+}
+
+#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _start, _field_size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_VBUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+ .start = (_start), \
+}
+
+#define VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, _info, _size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &(_info), \
+ .flags = VMS_BUFFER, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_BUFFER_POINTER_UNSAFE(_field, _state, _version, _size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_BUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_UNUSED_BUFFER(_test, _version, _size) { \
+ .name = "unused", \
+ .field_exists = (_test), \
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &vmstate_info_unused_buffer, \
+ .flags = VMS_BUFFER, \
+}
+
+/* _field_size should be a int32_t field in the _state struct giving the
+ * size of the bitmap _field in bits.
+ */
+#define VMSTATE_BITMAP(_field, _state, _version, _field_size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .size_offset = vmstate_offset_value(_state, _field_size, int32_t),\
+ .info = &vmstate_info_bitmap, \
+ .flags = VMS_VBUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+}
+
+/* _f : field name
+ _f_n : num of elements field_name
+ _n : num of elements
+ _s : struct state name
+ _v : version
+*/
+
+#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \
+ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type)
+
+#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \
+ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type)
+
+#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \
+ VMSTATE_STRUCT_POINTER_TEST(_field, _state, NULL, _vmsd, _type)
+
+#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \
+ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, \
+ _vmsd, _type)
+
+#define VMSTATE_BOOL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_bool, bool)
+
+#define VMSTATE_INT8_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int8, int8_t)
+#define VMSTATE_INT16_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int16, int16_t)
+#define VMSTATE_INT32_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int32, int32_t)
+#define VMSTATE_INT64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int64, int64_t)
+
+#define VMSTATE_UINT8_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint8, uint8_t)
+#define VMSTATE_UINT16_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16, uint16_t)
+#define VMSTATE_UINT32_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32, uint32_t)
+#define VMSTATE_UINT64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t)
+
+#define VMSTATE_BOOL(_f, _s) \
+ VMSTATE_BOOL_V(_f, _s, 0)
+
+#define VMSTATE_INT8(_f, _s) \
+ VMSTATE_INT8_V(_f, _s, 0)
+#define VMSTATE_INT16(_f, _s) \
+ VMSTATE_INT16_V(_f, _s, 0)
+#define VMSTATE_INT32(_f, _s) \
+ VMSTATE_INT32_V(_f, _s, 0)
+#define VMSTATE_INT64(_f, _s) \
+ VMSTATE_INT64_V(_f, _s, 0)
+
+#define VMSTATE_UINT8(_f, _s) \
+ VMSTATE_UINT8_V(_f, _s, 0)
+#define VMSTATE_UINT16(_f, _s) \
+ VMSTATE_UINT16_V(_f, _s, 0)
+#define VMSTATE_UINT32(_f, _s) \
+ VMSTATE_UINT32_V(_f, _s, 0)
+#define VMSTATE_UINT64(_f, _s) \
+ VMSTATE_UINT64_V(_f, _s, 0)
+
+#define VMSTATE_UINT8_EQUAL(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint8_equal, uint8_t)
+
+#define VMSTATE_UINT16_EQUAL(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint16_equal, uint16_t)
+
+#define VMSTATE_UINT16_EQUAL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16_equal, uint16_t)
+
+#define VMSTATE_INT32_EQUAL(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_equal, int32_t)
+
+#define VMSTATE_UINT32_EQUAL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32_equal, uint32_t)
+
+#define VMSTATE_UINT32_EQUAL(_f, _s) \
+ VMSTATE_UINT32_EQUAL_V(_f, _s, 0)
+
+#define VMSTATE_UINT64_EQUAL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64_equal, uint64_t)
+
+#define VMSTATE_UINT64_EQUAL(_f, _s) \
+ VMSTATE_UINT64_EQUAL_V(_f, _s, 0)
+
+#define VMSTATE_INT32_LE(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t)
+
+#define VMSTATE_UINT8_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT16_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT32_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint32, uint32_t)
+
+
+#define VMSTATE_FLOAT64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64(_f, _s) \
+ VMSTATE_FLOAT64_V(_f, _s, 0)
+
+#define VMSTATE_TIMER_TEST(_f, _s, _test) \
+ VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_TIMER_V(_f, _s, _v) \
+ VMSTATE_POINTER(_f, _s, _v, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_TIMER(_f, _s) \
+ VMSTATE_TIMER_V(_f, _s, 0)
+
+#define VMSTATE_TIMER_ARRAY(_f, _s, _n) \
+ VMSTATE_ARRAY_OF_POINTER(_f, _s, _n, 0, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_BOOL_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_bool, bool)
+
+#define VMSTATE_BOOL_ARRAY(_f, _s, _n) \
+ VMSTATE_BOOL_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT16_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, _v) \
+ VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT16_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT16_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT16_2DARRAY(_f, _s, _n1, _n2) \
+ VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
+#define VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, _v) \
+ VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT8_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT8_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT8_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT8_2DARRAY(_f, _s, _n1, _n2) \
+ VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
+#define VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint32, uint32_t)
+
+#define VMSTATE_UINT32_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint64, uint64_t)
+
+#define VMSTATE_UINT64_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int16, int16_t)
+
+#define VMSTATE_INT16_ARRAY(_f, _s, _n) \
+ VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT32_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int32, int32_t)
+
+#define VMSTATE_INT32_ARRAY(_f, _s, _n) \
+ VMSTATE_INT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT32_SUB_ARRAY(_f, _s, _start, _num) \
+ VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint32, uint32_t)
+
+#define VMSTATE_UINT32_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT64_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int64, int64_t)
+
+#define VMSTATE_INT64_ARRAY(_f, _s, _n) \
+ VMSTATE_INT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64_ARRAY(_f, _s, _n) \
+ VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_BUFFER_V(_f, _s, _v) \
+ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_BUFFER(_f, _s) \
+ VMSTATE_BUFFER_V(_f, _s, 0)
+
+#define VMSTATE_PARTIAL_BUFFER(_f, _s, _size) \
+ VMSTATE_STATIC_BUFFER(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_BUFFER_START_MIDDLE(_f, _s, _start) \
+ VMSTATE_STATIC_BUFFER(_f, _s, 0, NULL, _start, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_PARTIAL_VBUFFER(_f, _s, _size) \
+ VMSTATE_VBUFFER(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_PARTIAL_VBUFFER_UINT32(_f, _s, _size) \
+ VMSTATE_VBUFFER_UINT32(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_SUB_VBUFFER(_f, _s, _start, _size) \
+ VMSTATE_VBUFFER(_f, _s, 0, NULL, _start, _size)
+
+#define VMSTATE_BUFFER_TEST(_f, _s, _test) \
+ VMSTATE_STATIC_BUFFER(_f, _s, 0, _test, 0, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size) \
+ VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, vmstate_info_buffer, _size)
+
+#define VMSTATE_UNUSED_V(_v, _size) \
+ VMSTATE_UNUSED_BUFFER(NULL, _v, _size)
+
+#define VMSTATE_UNUSED(_size) \
+ VMSTATE_UNUSED_V(0, _size)
+
+#define VMSTATE_UNUSED_TEST(_test, _size) \
+ VMSTATE_UNUSED_BUFFER(_test, 0, _size)
+
+#define VMSTATE_END_OF_LIST() \
+ {}
+
+int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
+ void *opaque, int version_id);
+void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
+ void *opaque);
+
+int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
+ const VMStateDescription *vmsd,
+ void *base, int alias_id,
+ int required_for_version);
+
+static inline int vmstate_register(DeviceState *dev, int instance_id,
+ const VMStateDescription *vmsd,
+ void *opaque)
+{
+ return vmstate_register_with_alias_id(dev, instance_id, vmsd,
+ opaque, -1, 0);
+}
+
+void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
+ void *opaque);
+
+struct MemoryRegion;
+void vmstate_register_ram(struct MemoryRegion *memory, DeviceState *dev);
+void vmstate_unregister_ram(struct MemoryRegion *memory, DeviceState *dev);
+void vmstate_register_ram_global(struct MemoryRegion *memory);
+
+#endif
diff --git a/contrib/qemu/include/monitor/monitor.h b/contrib/qemu/include/monitor/monitor.h
new file mode 100644
index 000000000..1942cc42f
--- /dev/null
+++ b/contrib/qemu/include/monitor/monitor.h
@@ -0,0 +1,104 @@
+#ifndef MONITOR_H
+#define MONITOR_H
+
+#include "qemu-common.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "block/block.h"
+#include "monitor/readline.h"
+
+extern Monitor *cur_mon;
+extern Monitor *default_mon;
+
+/* flags for monitor_init */
+#define MONITOR_IS_DEFAULT 0x01
+#define MONITOR_USE_READLINE 0x02
+#define MONITOR_USE_CONTROL 0x04
+#define MONITOR_USE_PRETTY 0x08
+
+/* flags for monitor commands */
+#define MONITOR_CMD_ASYNC 0x0001
+
+/* QMP events */
+typedef enum MonitorEvent {
+ QEVENT_SHUTDOWN,
+ QEVENT_RESET,
+ QEVENT_POWERDOWN,
+ QEVENT_STOP,
+ QEVENT_RESUME,
+ QEVENT_VNC_CONNECTED,
+ QEVENT_VNC_INITIALIZED,
+ QEVENT_VNC_DISCONNECTED,
+ QEVENT_BLOCK_IO_ERROR,
+ QEVENT_RTC_CHANGE,
+ QEVENT_WATCHDOG,
+ QEVENT_SPICE_CONNECTED,
+ QEVENT_SPICE_INITIALIZED,
+ QEVENT_SPICE_DISCONNECTED,
+ QEVENT_BLOCK_JOB_COMPLETED,
+ QEVENT_BLOCK_JOB_CANCELLED,
+ QEVENT_BLOCK_JOB_ERROR,
+ QEVENT_BLOCK_JOB_READY,
+ QEVENT_DEVICE_DELETED,
+ QEVENT_DEVICE_TRAY_MOVED,
+ QEVENT_NIC_RX_FILTER_CHANGED,
+ QEVENT_SUSPEND,
+ QEVENT_SUSPEND_DISK,
+ QEVENT_WAKEUP,
+ QEVENT_BALLOON_CHANGE,
+ QEVENT_SPICE_MIGRATE_COMPLETED,
+ QEVENT_GUEST_PANICKED,
+
+ /* Add to 'monitor_event_names' array in monitor.c when
+ * defining new events here */
+
+ QEVENT_MAX,
+} MonitorEvent;
+
+int monitor_cur_is_qmp(void);
+
+void monitor_protocol_event(MonitorEvent event, QObject *data);
+void monitor_init(CharDriverState *chr, int flags);
+
+int monitor_suspend(Monitor *mon);
+void monitor_resume(Monitor *mon);
+
+int monitor_read_bdrv_key_start(Monitor *mon, BlockDriverState *bs,
+ BlockDriverCompletionFunc *completion_cb,
+ void *opaque);
+int monitor_read_block_device_key(Monitor *mon, const char *device,
+ BlockDriverCompletionFunc *completion_cb,
+ void *opaque);
+
+int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp);
+int monitor_handle_fd_param(Monitor *mon, const char *fdname);
+
+void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+ GCC_FMT_ATTR(2, 0);
+void monitor_printf(Monitor *mon, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+void monitor_print_filename(Monitor *mon, const char *filename);
+void monitor_flush(Monitor *mon);
+int monitor_set_cpu(int cpu_index);
+int monitor_get_cpu_index(void);
+
+typedef void (MonitorCompletion)(void *opaque, QObject *ret_data);
+
+void monitor_set_error(Monitor *mon, QError *qerror);
+void monitor_read_command(Monitor *mon, int show_prompt);
+ReadLineState *monitor_get_rs(Monitor *mon);
+int monitor_read_password(Monitor *mon, ReadLineFunc *readline_func,
+ void *opaque);
+
+int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret);
+
+int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret);
+
+AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
+ bool has_opaque, const char *opaque,
+ Error **errp);
+int monitor_fdset_get_fd(int64_t fdset_id, int flags);
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd);
+int monitor_fdset_dup_fd_remove(int dup_fd);
+int monitor_fdset_dup_fd_find(int dup_fd);
+
+#endif /* !MONITOR_H */
diff --git a/contrib/qemu/include/monitor/readline.h b/contrib/qemu/include/monitor/readline.h
new file mode 100644
index 000000000..fc9806ecf
--- /dev/null
+++ b/contrib/qemu/include/monitor/readline.h
@@ -0,0 +1,55 @@
+#ifndef READLINE_H
+#define READLINE_H
+
+#include "qemu-common.h"
+
+#define READLINE_CMD_BUF_SIZE 4095
+#define READLINE_MAX_CMDS 64
+#define READLINE_MAX_COMPLETIONS 256
+
+typedef void ReadLineFunc(Monitor *mon, const char *str, void *opaque);
+typedef void ReadLineCompletionFunc(const char *cmdline);
+
+typedef struct ReadLineState {
+ char cmd_buf[READLINE_CMD_BUF_SIZE + 1];
+ int cmd_buf_index;
+ int cmd_buf_size;
+
+ char last_cmd_buf[READLINE_CMD_BUF_SIZE + 1];
+ int last_cmd_buf_index;
+ int last_cmd_buf_size;
+
+ int esc_state;
+ int esc_param;
+
+ char *history[READLINE_MAX_CMDS];
+ int hist_entry;
+
+ ReadLineCompletionFunc *completion_finder;
+ char *completions[READLINE_MAX_COMPLETIONS];
+ int nb_completions;
+ int completion_index;
+
+ ReadLineFunc *readline_func;
+ void *readline_opaque;
+ int read_password;
+ char prompt[256];
+ Monitor *mon;
+} ReadLineState;
+
+void readline_add_completion(ReadLineState *rs, const char *str);
+void readline_set_completion_index(ReadLineState *rs, int completion_index);
+
+const char *readline_get_history(ReadLineState *rs, unsigned int index);
+
+void readline_handle_byte(ReadLineState *rs, int ch);
+
+void readline_start(ReadLineState *rs, const char *prompt, int read_password,
+ ReadLineFunc *readline_func, void *opaque);
+void readline_restart(ReadLineState *rs);
+void readline_show_prompt(ReadLineState *rs);
+
+ReadLineState *readline_init(Monitor *mon,
+ ReadLineCompletionFunc *completion_finder);
+
+#endif /* !READLINE_H */
diff --git a/contrib/qemu/include/qapi/error.h b/contrib/qemu/include/qapi/error.h
new file mode 100644
index 000000000..ffd1cea47
--- /dev/null
+++ b/contrib/qemu/include/qapi/error.h
@@ -0,0 +1,85 @@
+/*
+ * QEMU Error Objects
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2. See
+ * the COPYING.LIB file in the top-level directory.
+ */
+#ifndef ERROR_H
+#define ERROR_H
+
+#include "qemu/compiler.h"
+#include "qapi-types.h"
+#include <stdbool.h>
+
+/**
+ * A class representing internal errors within QEMU. An error has a ErrorClass
+ * code and a human message.
+ */
+typedef struct Error Error;
+
+/**
+ * Set an indirect pointer to an error given a ErrorClass value and a
+ * printf-style human message. This function is not meant to be used outside
+ * of QEMU.
+ */
+void error_set(Error **err, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(3, 4);
+
+/**
+ * Set an indirect pointer to an error given a ErrorClass value and a
+ * printf-style human message, followed by a strerror() string if
+ * @os_error is not zero.
+ */
+void error_set_errno(Error **err, int os_error, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(4, 5);
+
+/**
+ * Same as error_set(), but sets a generic error
+ */
+#define error_setg(err, fmt, ...) \
+ error_set(err, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+#define error_setg_errno(err, os_error, fmt, ...) \
+ error_set_errno(err, os_error, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+
+/**
+ * Helper for open() errors
+ */
+void error_setg_file_open(Error **errp, int os_errno, const char *filename);
+
+/**
+ * Returns true if an indirect pointer to an error is pointing to a valid
+ * error object.
+ */
+bool error_is_set(Error **err);
+
+/*
+ * Get the error class of an error object.
+ */
+ErrorClass error_get_class(const Error *err);
+
+/**
+ * Returns an exact copy of the error passed as an argument.
+ */
+Error *error_copy(const Error *err);
+
+/**
+ * Get a human readable representation of an error object.
+ */
+const char *error_get_pretty(Error *err);
+
+/**
+ * Propagate an error to an indirect pointer to an error. This function will
+ * always transfer ownership of the error reference and handles the case where
+ * dst_err is NULL correctly. Errors after the first are discarded.
+ */
+void error_propagate(Error **dst_err, Error *local_err);
+
+/**
+ * Free an error object.
+ */
+void error_free(Error *err);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/json-lexer.h b/contrib/qemu/include/qapi/qmp/json-lexer.h
new file mode 100644
index 000000000..cdff0460a
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/json-lexer.h
@@ -0,0 +1,51 @@
+/*
+ * JSON lexer
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_JSON_LEXER_H
+#define QEMU_JSON_LEXER_H
+
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qlist.h"
+
+typedef enum json_token_type {
+ JSON_OPERATOR = 100,
+ JSON_INTEGER,
+ JSON_FLOAT,
+ JSON_KEYWORD,
+ JSON_STRING,
+ JSON_ESCAPE,
+ JSON_SKIP,
+ JSON_ERROR,
+} JSONTokenType;
+
+typedef struct JSONLexer JSONLexer;
+
+typedef void (JSONLexerEmitter)(JSONLexer *, QString *, JSONTokenType, int x, int y);
+
+struct JSONLexer
+{
+ JSONLexerEmitter *emit;
+ int state;
+ QString *token;
+ int x, y;
+};
+
+void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func);
+
+int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size);
+
+int json_lexer_flush(JSONLexer *lexer);
+
+void json_lexer_destroy(JSONLexer *lexer);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/json-parser.h b/contrib/qemu/include/qapi/qmp/json-parser.h
new file mode 100644
index 000000000..44d88f346
--- /dev/null
+++